thailint 0.4.3__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- src/linters/dry/python_analyzer.py +61 -30
- src/orchestrator/core.py +12 -2
- {thailint-0.4.3.dist-info → thailint-0.4.4.dist-info}/METADATA +4 -2
- {thailint-0.4.3.dist-info → thailint-0.4.4.dist-info}/RECORD +7 -7
- {thailint-0.4.3.dist-info → thailint-0.4.4.dist-info}/WHEEL +1 -1
- {thailint-0.4.3.dist-info → thailint-0.4.4.dist-info}/entry_points.txt +0 -0
- {thailint-0.4.3.dist-info → thailint-0.4.4.dist-info/licenses}/LICENSE +0 -0
|
@@ -62,6 +62,9 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
|
|
|
62
62
|
"""
|
|
63
63
|
super().__init__()
|
|
64
64
|
self._filter_registry = filter_registry or create_default_registry()
|
|
65
|
+
# Performance optimization: Cache parsed AST to avoid re-parsing for each hash window
|
|
66
|
+
self._cached_ast: ast.Module | None = None
|
|
67
|
+
self._cached_content: str | None = None
|
|
65
68
|
|
|
66
69
|
def analyze(self, file_path: Path, content: str, config: DRYConfig) -> list[CodeBlock]:
|
|
67
70
|
"""Analyze Python file for duplicate code blocks, excluding docstrings.
|
|
@@ -74,37 +77,46 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
|
|
|
74
77
|
Returns:
|
|
75
78
|
List of CodeBlock instances with hash values
|
|
76
79
|
"""
|
|
77
|
-
#
|
|
78
|
-
|
|
80
|
+
# Performance optimization: Parse AST once and cache for _is_single_statement_in_source() calls
|
|
81
|
+
self._cached_ast = self._parse_content_safe(content)
|
|
82
|
+
self._cached_content = content
|
|
79
83
|
|
|
80
|
-
|
|
81
|
-
|
|
84
|
+
try:
|
|
85
|
+
# Get docstring line ranges
|
|
86
|
+
docstring_ranges = self._get_docstring_ranges_from_content(content)
|
|
82
87
|
|
|
83
|
-
|
|
84
|
-
|
|
88
|
+
# Tokenize with line number tracking
|
|
89
|
+
lines_with_numbers = self._tokenize_with_line_numbers(content, docstring_ranges)
|
|
85
90
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
# Skip blocks that are single logical statements
|
|
89
|
-
# Check the original source code, not the normalized snippet
|
|
90
|
-
if self._is_single_statement_in_source(content, start_line, end_line):
|
|
91
|
-
continue
|
|
91
|
+
# Generate rolling hash windows
|
|
92
|
+
windows = self._rolling_hash_with_tracking(lines_with_numbers, config.min_duplicate_lines)
|
|
92
93
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
)
|
|
94
|
+
blocks = []
|
|
95
|
+
for hash_val, start_line, end_line, snippet in windows:
|
|
96
|
+
# Skip blocks that are single logical statements
|
|
97
|
+
# Check the original source code, not the normalized snippet
|
|
98
|
+
if self._is_single_statement_in_source(content, start_line, end_line):
|
|
99
|
+
continue
|
|
100
100
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
101
|
+
block = CodeBlock(
|
|
102
|
+
file_path=file_path,
|
|
103
|
+
start_line=start_line,
|
|
104
|
+
end_line=end_line,
|
|
105
|
+
snippet=snippet,
|
|
106
|
+
hash_value=hash_val,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Apply extensible filters (keyword arguments, imports, etc.)
|
|
110
|
+
if self._filter_registry.should_filter_block(block, content):
|
|
111
|
+
continue
|
|
104
112
|
|
|
105
|
-
|
|
113
|
+
blocks.append(block)
|
|
106
114
|
|
|
107
|
-
|
|
115
|
+
return blocks
|
|
116
|
+
finally:
|
|
117
|
+
# Clear cache after analysis to avoid memory leaks
|
|
118
|
+
self._cached_ast = None
|
|
119
|
+
self._cached_content = None
|
|
108
120
|
|
|
109
121
|
def _get_docstring_ranges_from_content(self, content: str) -> set[int]:
|
|
110
122
|
"""Extract line numbers that are part of docstrings.
|
|
@@ -225,10 +237,19 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
|
|
|
225
237
|
return hashes
|
|
226
238
|
|
|
227
239
|
def _is_single_statement_in_source(self, content: str, start_line: int, end_line: int) -> bool:
|
|
228
|
-
"""Check if a line range in the original source is a single logical statement.
|
|
229
|
-
|
|
230
|
-
if
|
|
231
|
-
|
|
240
|
+
"""Check if a line range in the original source is a single logical statement.
|
|
241
|
+
|
|
242
|
+
Performance optimization: Uses cached AST if available (set by analyze() method)
|
|
243
|
+
to avoid re-parsing the entire file for each hash window check.
|
|
244
|
+
"""
|
|
245
|
+
# Use cached AST if available and content matches
|
|
246
|
+
if self._cached_ast is not None and content == self._cached_content:
|
|
247
|
+
tree = self._cached_ast
|
|
248
|
+
else:
|
|
249
|
+
# Fallback: parse content (used by tests or standalone calls)
|
|
250
|
+
tree = self._parse_content_safe(content)
|
|
251
|
+
if tree is None:
|
|
252
|
+
return False
|
|
232
253
|
|
|
233
254
|
return self._check_overlapping_nodes(tree, start_line, end_line)
|
|
234
255
|
|
|
@@ -241,9 +262,19 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
|
|
|
241
262
|
return None
|
|
242
263
|
|
|
243
264
|
def _check_overlapping_nodes(self, tree: ast.Module, start_line: int, end_line: int) -> bool:
|
|
244
|
-
"""Check if any AST node overlaps and matches single-statement pattern.
|
|
265
|
+
"""Check if any AST node overlaps and matches single-statement pattern.
|
|
266
|
+
|
|
267
|
+
Performance optimization: Pre-filter nodes by line range before expensive pattern checks.
|
|
268
|
+
"""
|
|
245
269
|
for node in ast.walk(tree):
|
|
246
|
-
|
|
270
|
+
# Quick line range check to skip nodes that don't overlap
|
|
271
|
+
if not hasattr(node, "lineno") or not hasattr(node, "end_lineno"):
|
|
272
|
+
continue
|
|
273
|
+
if node.end_lineno < start_line or node.lineno > end_line:
|
|
274
|
+
continue # No overlap, skip expensive pattern matching
|
|
275
|
+
|
|
276
|
+
# Node overlaps - check if it matches single-statement pattern
|
|
277
|
+
if self._is_single_statement_pattern(node, start_line, end_line):
|
|
247
278
|
return True
|
|
248
279
|
return False
|
|
249
280
|
|
src/orchestrator/core.py
CHANGED
|
@@ -101,8 +101,9 @@ class Orchestrator:
|
|
|
101
101
|
self.config_loader = LinterConfigLoader()
|
|
102
102
|
self.ignore_parser = IgnoreDirectiveParser(self.project_root)
|
|
103
103
|
|
|
104
|
-
#
|
|
105
|
-
|
|
104
|
+
# Performance optimization: Defer rule discovery until first file is linted
|
|
105
|
+
# This eliminates ~0.077s overhead for commands that don't need rules (--help, config, etc.)
|
|
106
|
+
self._rules_discovered = False
|
|
106
107
|
|
|
107
108
|
# Use provided config or load from project root
|
|
108
109
|
if config is not None:
|
|
@@ -208,6 +209,12 @@ class Orchestrator:
|
|
|
208
209
|
|
|
209
210
|
return violations
|
|
210
211
|
|
|
212
|
+
def _ensure_rules_discovered(self) -> None:
|
|
213
|
+
"""Ensure rules have been discovered and registered (lazy initialization)."""
|
|
214
|
+
if not self._rules_discovered:
|
|
215
|
+
self.registry.discover_rules("src.linters")
|
|
216
|
+
self._rules_discovered = True
|
|
217
|
+
|
|
211
218
|
def _get_rules_for_file(self, file_path: Path, language: str) -> list[BaseLintRule]:
|
|
212
219
|
"""Get rules applicable to this file.
|
|
213
220
|
|
|
@@ -218,6 +225,9 @@ class Orchestrator:
|
|
|
218
225
|
Returns:
|
|
219
226
|
List of rules to execute against this file.
|
|
220
227
|
"""
|
|
228
|
+
# Lazy initialization: discover rules on first lint operation
|
|
229
|
+
self._ensure_rules_discovered()
|
|
230
|
+
|
|
221
231
|
# For now, return all registered rules
|
|
222
232
|
# Future: filter by language, configuration, etc.
|
|
223
233
|
return self.registry.list_all()
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: thailint
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.4
|
|
4
4
|
Summary: The AI Linter - Enterprise-grade linting and governance for AI-generated code across multiple languages
|
|
5
5
|
License: MIT
|
|
6
|
+
License-File: LICENSE
|
|
6
7
|
Keywords: linter,ai,code-quality,static-analysis,file-placement,governance,multi-language,cli,docker,python
|
|
7
8
|
Author: Steve Jackson
|
|
8
9
|
Requires-Python: >=3.11,<4.0
|
|
@@ -15,6 +16,7 @@ Classifier: Programming Language :: Python :: 3
|
|
|
15
16
|
Classifier: Programming Language :: Python :: 3.11
|
|
16
17
|
Classifier: Programming Language :: Python :: 3.12
|
|
17
18
|
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
18
20
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
19
21
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
22
|
Classifier: Topic :: Software Development :: Quality Assurance
|
|
@@ -30,7 +30,7 @@ src/linters/dry/duplicate_storage.py,sha256=3OxE2mtoWGAsNNrB8J2c-4JirLUoqZ9ptydO
|
|
|
30
30
|
src/linters/dry/file_analyzer.py,sha256=ufSQ85ddsGTqGnBHZNTdV_5DGfTpUmJOB58sIdJNV0I,2928
|
|
31
31
|
src/linters/dry/inline_ignore.py,sha256=ASfA-fp_1aPpkakN2e0T6qdTh8S7Jqj89ovxXJLmFlc,4439
|
|
32
32
|
src/linters/dry/linter.py,sha256=XMLwCgGrFX0l0dVUJs1jpsXOfgxeKKDbxOtN5h5Emhk,5835
|
|
33
|
-
src/linters/dry/python_analyzer.py,sha256=
|
|
33
|
+
src/linters/dry/python_analyzer.py,sha256=RoC_OD0UqI0j5HVEwSZWUZVyHDNUtywvxse0HRumLoI,22748
|
|
34
34
|
src/linters/dry/storage_initializer.py,sha256=ykMALFs4uMUrN0_skEwySDl_t5Dm_LGHllF0OxDhiUI,1366
|
|
35
35
|
src/linters/dry/token_hasher.py,sha256=mCFuP0FQFALyKghBgZHcspsoOxgT7C7ZkfspnhFA5U4,3609
|
|
36
36
|
src/linters/dry/typescript_analyzer.py,sha256=n1rsQYp7nuPhgErbG8hWawkywRz-iFGhrGlQXDrIa14,21494
|
|
@@ -71,13 +71,13 @@ src/linters/srp/typescript_analyzer.py,sha256=Wi0P_G1v5AnZYtMN3sNm1iHva84-8Kep2L
|
|
|
71
71
|
src/linters/srp/typescript_metrics_calculator.py,sha256=2VLRux_tf1Cw645wwTuol3Z5A6-mkl4cgyW34myy00Q,2728
|
|
72
72
|
src/linters/srp/violation_builder.py,sha256=jaIjVtRYWUTs1SVJVwd0FxCojo0DxhPzfhyfMKmAroM,3881
|
|
73
73
|
src/orchestrator/__init__.py,sha256=XXLDJq2oaB-TpP2Y97GRnde9EkITGuFCmuLrDfxI9nY,245
|
|
74
|
-
src/orchestrator/core.py,sha256=
|
|
74
|
+
src/orchestrator/core.py,sha256=z0YcwsK18uhlztIPi54ux3mOm8fHMREYJoudsJPhC0Q,8857
|
|
75
75
|
src/orchestrator/language_detector.py,sha256=rHyVMApit80NTTNyDH1ObD1usKD8LjGmH3DwqNAWYGc,2736
|
|
76
76
|
src/templates/thailint_config_template.yaml,sha256=u8WFv2coE4uqfgf_slw7xjo4kGYIowDm1RIgxsKQzrE,4275
|
|
77
77
|
src/utils/__init__.py,sha256=NiBtKeQ09Y3kuUzeN4O1JNfUIYPQDS2AP1l5ODq-Dec,125
|
|
78
78
|
src/utils/project_root.py,sha256=b3YTEGTa9RPcOeHn1IByMMWyRiUabfVlpnlektL0A0o,6156
|
|
79
|
-
thailint-0.4.
|
|
80
|
-
thailint-0.4.
|
|
81
|
-
thailint-0.4.
|
|
82
|
-
thailint-0.4.
|
|
83
|
-
thailint-0.4.
|
|
79
|
+
thailint-0.4.4.dist-info/METADATA,sha256=1cmCJ3Myhrt90V0qWw-gfDr6cVP6nRO5fOZmy-TGSVY,36717
|
|
80
|
+
thailint-0.4.4.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
81
|
+
thailint-0.4.4.dist-info/entry_points.txt,sha256=l7DQJgU18sVLDpSaXOXY3lLhnQHQIRrSJZTQjG1cEAk,62
|
|
82
|
+
thailint-0.4.4.dist-info/licenses/LICENSE,sha256=kxh1J0Sb62XvhNJ6MZsVNe8PqNVJ7LHRn_EWa-T3djw,1070
|
|
83
|
+
thailint-0.4.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|