thailint 0.1.5__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- src/__init__.py +7 -2
- src/analyzers/__init__.py +23 -0
- src/analyzers/typescript_base.py +148 -0
- src/api.py +1 -1
- src/cli.py +498 -141
- src/config.py +6 -31
- src/core/base.py +12 -0
- src/core/cli_utils.py +206 -0
- src/core/config_parser.py +99 -0
- src/core/linter_utils.py +168 -0
- src/core/registry.py +17 -92
- src/core/rule_discovery.py +132 -0
- src/core/violation_builder.py +122 -0
- src/linter_config/ignore.py +112 -40
- src/linter_config/loader.py +3 -13
- src/linters/dry/__init__.py +23 -0
- src/linters/dry/base_token_analyzer.py +76 -0
- src/linters/dry/block_filter.py +262 -0
- src/linters/dry/block_grouper.py +59 -0
- src/linters/dry/cache.py +218 -0
- src/linters/dry/cache_query.py +61 -0
- src/linters/dry/config.py +130 -0
- src/linters/dry/config_loader.py +44 -0
- src/linters/dry/deduplicator.py +120 -0
- src/linters/dry/duplicate_storage.py +126 -0
- src/linters/dry/file_analyzer.py +127 -0
- src/linters/dry/inline_ignore.py +140 -0
- src/linters/dry/linter.py +170 -0
- src/linters/dry/python_analyzer.py +517 -0
- src/linters/dry/storage_initializer.py +51 -0
- src/linters/dry/token_hasher.py +115 -0
- src/linters/dry/typescript_analyzer.py +590 -0
- src/linters/dry/violation_builder.py +74 -0
- src/linters/dry/violation_filter.py +91 -0
- src/linters/dry/violation_generator.py +174 -0
- src/linters/file_placement/config_loader.py +86 -0
- src/linters/file_placement/directory_matcher.py +80 -0
- src/linters/file_placement/linter.py +252 -472
- src/linters/file_placement/path_resolver.py +61 -0
- src/linters/file_placement/pattern_matcher.py +55 -0
- src/linters/file_placement/pattern_validator.py +106 -0
- src/linters/file_placement/rule_checker.py +229 -0
- src/linters/file_placement/violation_factory.py +177 -0
- src/linters/nesting/config.py +13 -3
- src/linters/nesting/linter.py +76 -152
- src/linters/nesting/typescript_analyzer.py +38 -102
- src/linters/nesting/typescript_function_extractor.py +130 -0
- src/linters/nesting/violation_builder.py +139 -0
- src/linters/srp/__init__.py +99 -0
- src/linters/srp/class_analyzer.py +113 -0
- src/linters/srp/config.py +76 -0
- src/linters/srp/heuristics.py +89 -0
- src/linters/srp/linter.py +225 -0
- src/linters/srp/metrics_evaluator.py +47 -0
- src/linters/srp/python_analyzer.py +72 -0
- src/linters/srp/typescript_analyzer.py +75 -0
- src/linters/srp/typescript_metrics_calculator.py +90 -0
- src/linters/srp/violation_builder.py +117 -0
- src/orchestrator/core.py +42 -7
- src/utils/__init__.py +4 -0
- src/utils/project_root.py +84 -0
- {thailint-0.1.5.dist-info → thailint-0.2.0.dist-info}/METADATA +414 -63
- thailint-0.2.0.dist-info/RECORD +75 -0
- src/.ai/layout.yaml +0 -48
- thailint-0.1.5.dist-info/RECORD +0 -28
- {thailint-0.1.5.dist-info → thailint-0.2.0.dist-info}/LICENSE +0 -0
- {thailint-0.1.5.dist-info → thailint-0.2.0.dist-info}/WHEEL +0 -0
- {thailint-0.1.5.dist-info → thailint-0.2.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Query service for DRY cache database
|
|
3
|
+
|
|
4
|
+
Scope: Handles SQL queries for duplicate hash detection
|
|
5
|
+
|
|
6
|
+
Overview: Provides query methods for finding duplicate code blocks in the SQLite cache. Extracts
|
|
7
|
+
query logic from DRYCache to maintain SRP compliance. Handles queries for duplicate hashes
|
|
8
|
+
and blocks by hash value.
|
|
9
|
+
|
|
10
|
+
Dependencies: sqlite3.Connection
|
|
11
|
+
|
|
12
|
+
Exports: CacheQueryService class
|
|
13
|
+
|
|
14
|
+
Interfaces: CacheQueryService.get_duplicate_hashes(db), find_duplicates_by_hash(db, hash_value)
|
|
15
|
+
|
|
16
|
+
Implementation: SQL queries for duplicate detection, returns hash values and block data
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import sqlite3
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class CacheQueryService:
|
|
23
|
+
"""Handles cache database queries."""
|
|
24
|
+
|
|
25
|
+
def get_duplicate_hashes(self, db: sqlite3.Connection) -> list[int]:
|
|
26
|
+
"""Get all hash values that appear 2+ times.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
db: Database connection
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
List of hash values with 2 or more occurrences
|
|
33
|
+
"""
|
|
34
|
+
cursor = db.execute(
|
|
35
|
+
"""SELECT hash_value
|
|
36
|
+
FROM code_blocks
|
|
37
|
+
GROUP BY hash_value
|
|
38
|
+
HAVING COUNT(*) >= 2"""
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
return [row[0] for row in cursor]
|
|
42
|
+
|
|
43
|
+
def find_blocks_by_hash(self, db: sqlite3.Connection, hash_value: int) -> list[tuple]:
|
|
44
|
+
"""Find all blocks with given hash value.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
db: Database connection
|
|
48
|
+
hash_value: Hash to search for
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
List of tuples (file_path, start_line, end_line, snippet, hash_value)
|
|
52
|
+
"""
|
|
53
|
+
cursor = db.execute(
|
|
54
|
+
"""SELECT file_path, start_line, end_line, snippet, hash_value
|
|
55
|
+
FROM code_blocks
|
|
56
|
+
WHERE hash_value = ?
|
|
57
|
+
ORDER BY file_path, start_line""",
|
|
58
|
+
(hash_value,),
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
return cursor.fetchall()
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Configuration schema for DRY linter with caching support
|
|
3
|
+
|
|
4
|
+
Scope: DRYConfig dataclass with validation, defaults, and loading from dictionary
|
|
5
|
+
|
|
6
|
+
Overview: Defines configuration structure for the DRY linter including duplicate detection thresholds,
|
|
7
|
+
caching settings, and ignore patterns. Provides validation of configuration values to ensure
|
|
8
|
+
sensible defaults and prevent misconfiguration. Supports loading from YAML configuration files
|
|
9
|
+
through from_dict classmethod. Cache enabled by default for performance on large codebases.
|
|
10
|
+
|
|
11
|
+
Dependencies: Python dataclasses module
|
|
12
|
+
|
|
13
|
+
Exports: DRYConfig dataclass
|
|
14
|
+
|
|
15
|
+
Interfaces: DRYConfig.__init__, DRYConfig.from_dict(config: dict) -> DRYConfig
|
|
16
|
+
|
|
17
|
+
Implementation: Dataclass with field defaults, __post_init__ validation, and dict-based construction
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from dataclasses import dataclass, field
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class DRYConfig: # pylint: disable=too-many-instance-attributes
|
|
26
|
+
"""Configuration for DRY linter.
|
|
27
|
+
|
|
28
|
+
Note: Pylint too-many-instance-attributes disabled. This is a configuration
|
|
29
|
+
dataclass serving as a data container for related DRY linter settings.
|
|
30
|
+
All 12 attributes are cohesively related (detection thresholds, language
|
|
31
|
+
overrides, caching, filtering). Splitting would reduce cohesion and make
|
|
32
|
+
configuration loading more complex without meaningful benefit.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
enabled: bool = False # Must be explicitly enabled
|
|
36
|
+
min_duplicate_lines: int = 3
|
|
37
|
+
min_duplicate_tokens: int = 30
|
|
38
|
+
min_occurrences: int = 2 # Minimum occurrences to report (default: 2)
|
|
39
|
+
|
|
40
|
+
# Language-specific overrides
|
|
41
|
+
python_min_occurrences: int | None = None
|
|
42
|
+
typescript_min_occurrences: int | None = None
|
|
43
|
+
javascript_min_occurrences: int | None = None
|
|
44
|
+
|
|
45
|
+
# Cache settings
|
|
46
|
+
cache_enabled: bool = True # ON by default for performance
|
|
47
|
+
cache_path: str = ".thailint-cache/dry.db"
|
|
48
|
+
cache_max_age_days: int = 30
|
|
49
|
+
|
|
50
|
+
# Ignore patterns
|
|
51
|
+
ignore_patterns: list[str] = field(default_factory=lambda: ["tests/", "__init__.py"])
|
|
52
|
+
|
|
53
|
+
# Block filters (extensible false positive filtering)
|
|
54
|
+
filters: dict[str, bool] = field(
|
|
55
|
+
default_factory=lambda: {
|
|
56
|
+
"keyword_argument_filter": True, # Filter keyword argument blocks
|
|
57
|
+
"import_group_filter": True, # Filter import statement groups
|
|
58
|
+
}
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
def __post_init__(self) -> None:
|
|
62
|
+
"""Validate configuration values."""
|
|
63
|
+
if self.min_duplicate_lines <= 0:
|
|
64
|
+
raise ValueError(
|
|
65
|
+
f"min_duplicate_lines must be positive, got {self.min_duplicate_lines}"
|
|
66
|
+
)
|
|
67
|
+
if self.min_duplicate_tokens <= 0:
|
|
68
|
+
raise ValueError(
|
|
69
|
+
f"min_duplicate_tokens must be positive, got {self.min_duplicate_tokens}"
|
|
70
|
+
)
|
|
71
|
+
if self.min_occurrences <= 0:
|
|
72
|
+
raise ValueError(f"min_occurrences must be positive, got {self.min_occurrences}")
|
|
73
|
+
|
|
74
|
+
def get_min_occurrences_for_language(self, language: str) -> int:
|
|
75
|
+
"""Get minimum occurrences threshold for a specific language.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
language: Language identifier (e.g., "python", "typescript", "javascript")
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
Minimum occurrences threshold for the language, or global default
|
|
82
|
+
"""
|
|
83
|
+
language_lower = language.lower()
|
|
84
|
+
|
|
85
|
+
language_overrides = {
|
|
86
|
+
"python": self.python_min_occurrences,
|
|
87
|
+
"typescript": self.typescript_min_occurrences,
|
|
88
|
+
"javascript": self.javascript_min_occurrences,
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
override = language_overrides.get(language_lower)
|
|
92
|
+
return override if override is not None else self.min_occurrences
|
|
93
|
+
|
|
94
|
+
@classmethod
|
|
95
|
+
def from_dict(cls, config: dict[str, Any]) -> "DRYConfig":
|
|
96
|
+
"""Load configuration from dictionary.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
config: Dictionary containing configuration values
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
DRYConfig instance with values from dictionary
|
|
103
|
+
"""
|
|
104
|
+
# Extract language-specific min_occurrences
|
|
105
|
+
python_config = config.get("python", {})
|
|
106
|
+
typescript_config = config.get("typescript", {})
|
|
107
|
+
javascript_config = config.get("javascript", {})
|
|
108
|
+
|
|
109
|
+
# Load filter configuration (merge with defaults)
|
|
110
|
+
default_filters = {
|
|
111
|
+
"keyword_argument_filter": True,
|
|
112
|
+
"import_group_filter": True,
|
|
113
|
+
}
|
|
114
|
+
custom_filters = config.get("filters", {})
|
|
115
|
+
filters = {**default_filters, **custom_filters}
|
|
116
|
+
|
|
117
|
+
return cls(
|
|
118
|
+
enabled=config.get("enabled", False),
|
|
119
|
+
min_duplicate_lines=config.get("min_duplicate_lines", 3),
|
|
120
|
+
min_duplicate_tokens=config.get("min_duplicate_tokens", 30),
|
|
121
|
+
min_occurrences=config.get("min_occurrences", 2),
|
|
122
|
+
python_min_occurrences=python_config.get("min_occurrences"),
|
|
123
|
+
typescript_min_occurrences=typescript_config.get("min_occurrences"),
|
|
124
|
+
javascript_min_occurrences=javascript_config.get("min_occurrences"),
|
|
125
|
+
cache_enabled=config.get("cache_enabled", True),
|
|
126
|
+
cache_path=config.get("cache_path", ".thailint-cache/dry.db"),
|
|
127
|
+
cache_max_age_days=config.get("cache_max_age_days", 30),
|
|
128
|
+
ignore_patterns=config.get("ignore", []),
|
|
129
|
+
filters=filters,
|
|
130
|
+
)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Configuration loading from lint context metadata
|
|
3
|
+
|
|
4
|
+
Scope: Extracts and validates DRY configuration from context
|
|
5
|
+
|
|
6
|
+
Overview: Handles extraction of DRY configuration from BaseLintContext metadata dictionary.
|
|
7
|
+
Validates configuration structure and converts to DRYConfig instance. Separates config
|
|
8
|
+
loading logic from main linter rule to maintain SRP compliance.
|
|
9
|
+
|
|
10
|
+
Dependencies: BaseLintContext, DRYConfig
|
|
11
|
+
|
|
12
|
+
Exports: ConfigLoader class
|
|
13
|
+
|
|
14
|
+
Interfaces: ConfigLoader.load_config(context) -> DRYConfig
|
|
15
|
+
|
|
16
|
+
Implementation: Extracts from context metadata, validates dict structure, uses DRYConfig.from_dict()
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from src.core.base import BaseLintContext
|
|
20
|
+
|
|
21
|
+
from .config import DRYConfig
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ConfigLoader:
|
|
25
|
+
"""Loads DRY configuration from lint context."""
|
|
26
|
+
|
|
27
|
+
def load_config(self, context: BaseLintContext) -> DRYConfig:
|
|
28
|
+
"""Load configuration from context metadata.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
context: Lint context containing metadata
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
DRYConfig instance
|
|
35
|
+
"""
|
|
36
|
+
metadata = getattr(context, "metadata", None)
|
|
37
|
+
if not isinstance(metadata, dict):
|
|
38
|
+
return DRYConfig()
|
|
39
|
+
|
|
40
|
+
config_dict = metadata.get("dry", {})
|
|
41
|
+
if not isinstance(config_dict, dict):
|
|
42
|
+
return DRYConfig()
|
|
43
|
+
|
|
44
|
+
return DRYConfig.from_dict(config_dict)
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Deduplication utility for overlapping code block violations
|
|
3
|
+
|
|
4
|
+
Scope: Handles filtering of overlapping duplicate code violations
|
|
5
|
+
|
|
6
|
+
Overview: Provides utilities to remove overlapping violations from duplicate code detection results.
|
|
7
|
+
Delegates grouping to BlockGrouper and filtering to ViolationFilter. Handles both block-level
|
|
8
|
+
deduplication (one block per file) and violation-level deduplication (removing overlaps).
|
|
9
|
+
|
|
10
|
+
Dependencies: CodeBlock, Violation, BlockGrouper, ViolationFilter
|
|
11
|
+
|
|
12
|
+
Exports: ViolationDeduplicator class
|
|
13
|
+
|
|
14
|
+
Interfaces: ViolationDeduplicator.deduplicate_blocks(blocks), deduplicate_violations(violations)
|
|
15
|
+
|
|
16
|
+
Implementation: Delegates to BlockGrouper and ViolationFilter for SRP compliance
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from src.core.types import Violation
|
|
20
|
+
|
|
21
|
+
from .block_grouper import BlockGrouper
|
|
22
|
+
from .cache import CodeBlock
|
|
23
|
+
from .violation_filter import ViolationFilter
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ViolationDeduplicator:
|
|
27
|
+
"""Removes overlapping duplicate code violations."""
|
|
28
|
+
|
|
29
|
+
def __init__(self) -> None:
|
|
30
|
+
"""Initialize with helper components."""
|
|
31
|
+
self._grouper = BlockGrouper()
|
|
32
|
+
self._filter = ViolationFilter()
|
|
33
|
+
|
|
34
|
+
def deduplicate_blocks(self, blocks: list[CodeBlock]) -> list[CodeBlock]:
|
|
35
|
+
"""Remove overlapping blocks from same file.
|
|
36
|
+
|
|
37
|
+
When rolling hash creates overlapping windows, keep non-overlapping blocks.
|
|
38
|
+
Blocks are overlapping if they share any line numbers in the same file.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
blocks: List of code blocks (may have overlaps from rolling hash)
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
Deduplicated list of blocks (non-overlapping blocks preserved)
|
|
45
|
+
"""
|
|
46
|
+
if not blocks:
|
|
47
|
+
return []
|
|
48
|
+
|
|
49
|
+
grouped = self._grouper.group_blocks_by_file(blocks)
|
|
50
|
+
deduplicated = []
|
|
51
|
+
|
|
52
|
+
for file_blocks in grouped.values():
|
|
53
|
+
kept = self._remove_overlaps_from_file(file_blocks)
|
|
54
|
+
deduplicated.extend(kept)
|
|
55
|
+
|
|
56
|
+
return deduplicated
|
|
57
|
+
|
|
58
|
+
def _remove_overlaps_from_file(self, file_blocks: list[CodeBlock]) -> list[CodeBlock]:
|
|
59
|
+
"""Remove overlapping blocks from single file.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
file_blocks: Blocks from same file
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
Non-overlapping blocks
|
|
66
|
+
"""
|
|
67
|
+
sorted_blocks = sorted(file_blocks, key=lambda b: b.start_line)
|
|
68
|
+
kept_blocks: list[CodeBlock] = []
|
|
69
|
+
|
|
70
|
+
for block in sorted_blocks:
|
|
71
|
+
if not self._overlaps_any_kept(block, kept_blocks):
|
|
72
|
+
kept_blocks.append(block)
|
|
73
|
+
|
|
74
|
+
return kept_blocks
|
|
75
|
+
|
|
76
|
+
def _overlaps_any_kept(self, block: CodeBlock, kept_blocks: list[CodeBlock]) -> bool:
|
|
77
|
+
"""Check if block overlaps with any kept blocks.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
block: Block to check
|
|
81
|
+
kept_blocks: Previously kept blocks
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
True if block overlaps with any kept block
|
|
85
|
+
"""
|
|
86
|
+
return any(self._blocks_overlap(block, kept) for kept in kept_blocks)
|
|
87
|
+
|
|
88
|
+
def _blocks_overlap(self, block1: CodeBlock, block2: CodeBlock) -> bool:
|
|
89
|
+
"""Check if two blocks overlap (share any lines).
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
block1: First code block
|
|
93
|
+
block2: Second code block
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
True if blocks overlap
|
|
97
|
+
"""
|
|
98
|
+
return block1.start_line <= block2.end_line and block2.start_line <= block1.end_line
|
|
99
|
+
|
|
100
|
+
def deduplicate_violations(self, violations: list[Violation]) -> list[Violation]:
|
|
101
|
+
"""Remove overlapping violations from same file.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
violations: List of violations (may overlap)
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
Deduplicated list of violations
|
|
108
|
+
"""
|
|
109
|
+
if not violations:
|
|
110
|
+
return []
|
|
111
|
+
|
|
112
|
+
grouped = self._grouper.group_violations_by_file(violations)
|
|
113
|
+
deduplicated = []
|
|
114
|
+
|
|
115
|
+
for file_violations in grouped.values():
|
|
116
|
+
sorted_violations = sorted(file_violations, key=lambda v: v.line or 0)
|
|
117
|
+
kept = self._filter.filter_overlapping(sorted_violations)
|
|
118
|
+
deduplicated.extend(kept)
|
|
119
|
+
|
|
120
|
+
return deduplicated
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Storage management for duplicate code blocks with cache and memory fallback
|
|
3
|
+
|
|
4
|
+
Scope: Manages storage of code blocks in SQLite cache or in-memory dict
|
|
5
|
+
|
|
6
|
+
Overview: Provides unified storage interface for code blocks supporting both SQLite-backed caching
|
|
7
|
+
and in-memory fallback when cache disabled. Handles block insertion, retrieval, and duplicate
|
|
8
|
+
hash queries. Encapsulates Decision 6 (in-memory fallback) implementation. Separates storage
|
|
9
|
+
concerns from linting logic to maintain SRP compliance.
|
|
10
|
+
|
|
11
|
+
Dependencies: DRYCache, CodeBlock, Path
|
|
12
|
+
|
|
13
|
+
Exports: DuplicateStorage class
|
|
14
|
+
|
|
15
|
+
Interfaces: DuplicateStorage.add_blocks(file_path, blocks), get_duplicate_hashes(),
|
|
16
|
+
get_blocks_for_hash(hash_value)
|
|
17
|
+
|
|
18
|
+
Implementation: Delegates to either SQLite cache or in-memory dict based on cache_enabled setting
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
|
|
23
|
+
from .cache import CodeBlock, DRYCache
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class DuplicateStorage:
|
|
27
|
+
"""Manages storage of code blocks in cache or memory."""
|
|
28
|
+
|
|
29
|
+
def __init__(self, cache: DRYCache | None) -> None:
|
|
30
|
+
"""Initialize storage with optional cache.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
cache: SQLite cache instance (None for in-memory mode)
|
|
34
|
+
"""
|
|
35
|
+
self._cache = cache
|
|
36
|
+
self._memory_store: dict[int, list[CodeBlock]] = {}
|
|
37
|
+
|
|
38
|
+
def add_blocks(self, file_path: Path, blocks: list[CodeBlock]) -> None:
|
|
39
|
+
"""Add code blocks to storage and cache.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
file_path: Path to source file
|
|
43
|
+
blocks: List of code blocks to store
|
|
44
|
+
"""
|
|
45
|
+
# Always add to memory for duplicate detection
|
|
46
|
+
self._add_to_memory(blocks)
|
|
47
|
+
|
|
48
|
+
# Also persist to cache if available
|
|
49
|
+
if self._cache:
|
|
50
|
+
self._add_to_cache(file_path, blocks)
|
|
51
|
+
|
|
52
|
+
def add_blocks_to_memory(self, file_path: Path, blocks: list[CodeBlock]) -> None:
|
|
53
|
+
"""Add code blocks to in-memory storage only (for cache hits).
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
file_path: Path to source file (used for cache persistence check)
|
|
57
|
+
blocks: List of code blocks to store
|
|
58
|
+
"""
|
|
59
|
+
# Add to memory for duplicate detection this run
|
|
60
|
+
self._add_to_memory(blocks)
|
|
61
|
+
|
|
62
|
+
# Guard clauses - early returns for skip conditions
|
|
63
|
+
if not self._cache:
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
if not blocks:
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
# Update cache with new blocks if needed (for fresh analysis)
|
|
70
|
+
self._update_cache_if_fresh(file_path, blocks)
|
|
71
|
+
|
|
72
|
+
def _update_cache_if_fresh(self, file_path: Path, blocks: list[CodeBlock]) -> None:
|
|
73
|
+
"""Update cache if file analysis is fresh (not from cache).
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
file_path: Path to source file
|
|
77
|
+
blocks: List of code blocks to store
|
|
78
|
+
"""
|
|
79
|
+
if not self._cache:
|
|
80
|
+
return
|
|
81
|
+
|
|
82
|
+
try:
|
|
83
|
+
mtime = file_path.stat().st_mtime
|
|
84
|
+
except OSError:
|
|
85
|
+
# File doesn't exist, skip cache
|
|
86
|
+
return
|
|
87
|
+
|
|
88
|
+
# File was analyzed (not cached), so persist if not fresh
|
|
89
|
+
if not self._cache.is_fresh(file_path, mtime):
|
|
90
|
+
self._add_to_cache(file_path, blocks)
|
|
91
|
+
|
|
92
|
+
def get_duplicate_hashes(self) -> list[int]:
|
|
93
|
+
"""Get all hash values with 2+ occurrences from memory.
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
List of hash values that appear in multiple blocks
|
|
97
|
+
"""
|
|
98
|
+
# Always query from in-memory store for this run's files
|
|
99
|
+
return [h for h, blocks in self._memory_store.items() if len(blocks) >= 2]
|
|
100
|
+
|
|
101
|
+
def get_blocks_for_hash(self, hash_value: int) -> list[CodeBlock]:
|
|
102
|
+
"""Get all blocks with given hash value from memory.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
hash_value: Hash to search for
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
List of code blocks with this hash
|
|
109
|
+
"""
|
|
110
|
+
# Always query from in-memory store for this run's files
|
|
111
|
+
return self._memory_store.get(hash_value, [])
|
|
112
|
+
|
|
113
|
+
def _add_to_cache(self, file_path: Path, blocks: list[CodeBlock]) -> None:
|
|
114
|
+
"""Add blocks to SQLite cache."""
|
|
115
|
+
if not self._cache or not blocks:
|
|
116
|
+
return
|
|
117
|
+
|
|
118
|
+
mtime = file_path.stat().st_mtime
|
|
119
|
+
self._cache.save(file_path, mtime, blocks)
|
|
120
|
+
|
|
121
|
+
def _add_to_memory(self, blocks: list[CodeBlock]) -> None:
|
|
122
|
+
"""Add blocks to in-memory store."""
|
|
123
|
+
for block in blocks:
|
|
124
|
+
if block.hash_value not in self._memory_store:
|
|
125
|
+
self._memory_store[block.hash_value] = []
|
|
126
|
+
self._memory_store[block.hash_value].append(block)
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: File analysis orchestration for duplicate detection
|
|
3
|
+
|
|
4
|
+
Scope: Coordinates language-specific analyzers and cache checking
|
|
5
|
+
|
|
6
|
+
Overview: Orchestrates file analysis by delegating to language-specific analyzers (Python, TypeScript)
|
|
7
|
+
and checking cache freshness. Handles cache hits by loading from cache, and cache misses by
|
|
8
|
+
analyzing files. Separates file analysis orchestration from main linter rule logic to maintain
|
|
9
|
+
SRP compliance.
|
|
10
|
+
|
|
11
|
+
Dependencies: PythonDuplicateAnalyzer, TypeScriptDuplicateAnalyzer, DRYCache, DRYConfig, CodeBlock
|
|
12
|
+
|
|
13
|
+
Exports: FileAnalyzer class
|
|
14
|
+
|
|
15
|
+
Interfaces: FileAnalyzer.analyze_or_load(file_path, content, language, config, cache)
|
|
16
|
+
|
|
17
|
+
Implementation: Delegates to language-specific analyzers, checks cache freshness
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from dataclasses import dataclass
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
|
|
23
|
+
from .block_filter import BlockFilterRegistry, create_default_registry
|
|
24
|
+
from .cache import CodeBlock, DRYCache
|
|
25
|
+
from .config import DRYConfig
|
|
26
|
+
from .python_analyzer import PythonDuplicateAnalyzer
|
|
27
|
+
from .typescript_analyzer import TypeScriptDuplicateAnalyzer
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class FileAnalysisContext:
|
|
32
|
+
"""Context for file analysis."""
|
|
33
|
+
|
|
34
|
+
file_path: Path
|
|
35
|
+
content: str
|
|
36
|
+
language: str
|
|
37
|
+
config: DRYConfig
|
|
38
|
+
cache: DRYCache | None
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class FileAnalyzer:
|
|
42
|
+
"""Orchestrates file analysis with cache support."""
|
|
43
|
+
|
|
44
|
+
def __init__(self, config: DRYConfig | None = None) -> None:
|
|
45
|
+
"""Initialize with language-specific analyzers.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
config: DRY configuration (used to configure filters)
|
|
49
|
+
"""
|
|
50
|
+
# Create filter registry based on config
|
|
51
|
+
filter_registry = self._create_filter_registry(config)
|
|
52
|
+
|
|
53
|
+
# Initialize analyzers with filter registry
|
|
54
|
+
self._python_analyzer = PythonDuplicateAnalyzer(filter_registry)
|
|
55
|
+
self._typescript_analyzer = TypeScriptDuplicateAnalyzer()
|
|
56
|
+
|
|
57
|
+
def _create_filter_registry(self, config: DRYConfig | None) -> BlockFilterRegistry:
|
|
58
|
+
"""Create filter registry based on configuration.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
config: DRY configuration
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Configured BlockFilterRegistry
|
|
65
|
+
"""
|
|
66
|
+
registry = create_default_registry()
|
|
67
|
+
|
|
68
|
+
if not config:
|
|
69
|
+
return registry
|
|
70
|
+
|
|
71
|
+
# Configure filters based on config.filters dict
|
|
72
|
+
for filter_name, enabled in config.filters.items():
|
|
73
|
+
if enabled:
|
|
74
|
+
registry.enable_filter(filter_name)
|
|
75
|
+
else:
|
|
76
|
+
registry.disable_filter(filter_name)
|
|
77
|
+
|
|
78
|
+
return registry
|
|
79
|
+
|
|
80
|
+
def analyze_or_load( # pylint: disable=too-many-arguments,too-many-positional-arguments
|
|
81
|
+
self,
|
|
82
|
+
file_path: Path,
|
|
83
|
+
content: str,
|
|
84
|
+
language: str,
|
|
85
|
+
config: DRYConfig,
|
|
86
|
+
cache: DRYCache | None = None,
|
|
87
|
+
) -> list[CodeBlock]:
|
|
88
|
+
"""Analyze file or load from cache.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
file_path: Path to file
|
|
92
|
+
content: File content
|
|
93
|
+
language: File language
|
|
94
|
+
config: DRY configuration
|
|
95
|
+
cache: Optional cache instance
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
List of CodeBlock instances
|
|
99
|
+
"""
|
|
100
|
+
# Check if file is fresh in cache
|
|
101
|
+
if cache:
|
|
102
|
+
mtime = file_path.stat().st_mtime
|
|
103
|
+
if cache.is_fresh(file_path, mtime):
|
|
104
|
+
return cache.load(file_path)
|
|
105
|
+
|
|
106
|
+
# Analyze file based on language
|
|
107
|
+
return self._analyze_file(file_path, content, language, config)
|
|
108
|
+
|
|
109
|
+
def _analyze_file(
|
|
110
|
+
self, file_path: Path, content: str, language: str, config: DRYConfig
|
|
111
|
+
) -> list[CodeBlock]:
|
|
112
|
+
"""Analyze file based on language.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
file_path: Path to file
|
|
116
|
+
content: File content
|
|
117
|
+
language: File language
|
|
118
|
+
config: DRY configuration
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
List of CodeBlock instances
|
|
122
|
+
"""
|
|
123
|
+
if language == "python":
|
|
124
|
+
return self._python_analyzer.analyze(file_path, content, config)
|
|
125
|
+
if language in ("typescript", "javascript"):
|
|
126
|
+
return self._typescript_analyzer.analyze(file_path, content, config)
|
|
127
|
+
return []
|