thailint 0.4.3__tar.gz → 0.4.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {thailint-0.4.3 → thailint-0.4.5}/PKG-INFO +4 -2
- {thailint-0.4.3 → thailint-0.4.5}/pyproject.toml +1 -1
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/python_analyzer.py +176 -36
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/token_hasher.py +63 -9
- thailint-0.4.5/src/linters/file_header/__init__.py +24 -0
- thailint-0.4.5/src/linters/file_header/atemporal_detector.py +87 -0
- thailint-0.4.5/src/linters/file_header/config.py +66 -0
- thailint-0.4.5/src/linters/file_header/field_validator.py +69 -0
- thailint-0.4.5/src/linters/file_header/linter.py +313 -0
- thailint-0.4.5/src/linters/file_header/python_parser.py +86 -0
- thailint-0.4.5/src/linters/file_header/violation_builder.py +78 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/orchestrator/core.py +12 -2
- {thailint-0.4.3 → thailint-0.4.5}/CHANGELOG.md +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/LICENSE +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/README.md +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/__init__.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/analyzers/__init__.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/analyzers/typescript_base.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/api.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/cli.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/config.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/core/__init__.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/core/base.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/core/cli_utils.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/core/config_parser.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/core/linter_utils.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/core/registry.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/core/rule_discovery.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/core/types.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/core/violation_builder.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linter_config/__init__.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linter_config/ignore.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linter_config/loader.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/__init__.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/__init__.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/base_token_analyzer.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/block_filter.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/block_grouper.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/cache.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/cache_query.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/config.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/config_loader.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/deduplicator.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/duplicate_storage.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/file_analyzer.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/inline_ignore.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/linter.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/storage_initializer.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/typescript_analyzer.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/violation_builder.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/violation_filter.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/violation_generator.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/file_placement/__init__.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/file_placement/config_loader.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/file_placement/directory_matcher.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/file_placement/linter.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/file_placement/path_resolver.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/file_placement/pattern_matcher.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/file_placement/pattern_validator.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/file_placement/rule_checker.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/file_placement/violation_factory.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/magic_numbers/__init__.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/magic_numbers/config.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/magic_numbers/context_analyzer.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/magic_numbers/linter.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/magic_numbers/python_analyzer.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/magic_numbers/typescript_analyzer.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/magic_numbers/violation_builder.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/nesting/__init__.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/nesting/config.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/nesting/linter.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/nesting/python_analyzer.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/nesting/typescript_analyzer.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/nesting/typescript_function_extractor.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/nesting/violation_builder.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/srp/__init__.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/srp/class_analyzer.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/srp/config.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/srp/heuristics.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/srp/linter.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/srp/metrics_evaluator.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/srp/python_analyzer.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/srp/typescript_analyzer.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/srp/typescript_metrics_calculator.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/linters/srp/violation_builder.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/orchestrator/__init__.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/orchestrator/language_detector.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/templates/thailint_config_template.yaml +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/utils/__init__.py +0 -0
- {thailint-0.4.3 → thailint-0.4.5}/src/utils/project_root.py +0 -0
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: thailint
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.5
|
|
4
4
|
Summary: The AI Linter - Enterprise-grade linting and governance for AI-generated code across multiple languages
|
|
5
5
|
License: MIT
|
|
6
|
+
License-File: LICENSE
|
|
6
7
|
Keywords: linter,ai,code-quality,static-analysis,file-placement,governance,multi-language,cli,docker,python
|
|
7
8
|
Author: Steve Jackson
|
|
8
9
|
Requires-Python: >=3.11,<4.0
|
|
@@ -15,6 +16,7 @@ Classifier: Programming Language :: Python :: 3
|
|
|
15
16
|
Classifier: Programming Language :: Python :: 3.11
|
|
16
17
|
Classifier: Programming Language :: Python :: 3.12
|
|
17
18
|
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
18
20
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
19
21
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
22
|
Classifier: Topic :: Software Development :: Quality Assurance
|
|
@@ -17,7 +17,7 @@ build-backend = "poetry.core.masonry.api"
|
|
|
17
17
|
|
|
18
18
|
[tool.poetry]
|
|
19
19
|
name = "thailint"
|
|
20
|
-
version = "0.4.
|
|
20
|
+
version = "0.4.5"
|
|
21
21
|
description = "The AI Linter - Enterprise-grade linting and governance for AI-generated code across multiple languages"
|
|
22
22
|
authors = ["Steve Jackson"]
|
|
23
23
|
license = "MIT"
|
|
@@ -62,8 +62,15 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
|
|
|
62
62
|
"""
|
|
63
63
|
super().__init__()
|
|
64
64
|
self._filter_registry = filter_registry or create_default_registry()
|
|
65
|
-
|
|
66
|
-
|
|
65
|
+
# Performance optimization: Cache parsed AST to avoid re-parsing for each hash window
|
|
66
|
+
self._cached_ast: ast.Module | None = None
|
|
67
|
+
self._cached_content: str | None = None
|
|
68
|
+
# Performance optimization: Line-to-node index for O(1) lookups instead of O(n) ast.walk()
|
|
69
|
+
self._line_to_nodes: dict[int, list[ast.AST]] | None = None
|
|
70
|
+
|
|
71
|
+
def analyze( # thailint: ignore[nesting.excessive-depth]
|
|
72
|
+
self, file_path: Path, content: str, config: DRYConfig
|
|
73
|
+
) -> list[CodeBlock]:
|
|
67
74
|
"""Analyze Python file for duplicate code blocks, excluding docstrings.
|
|
68
75
|
|
|
69
76
|
Args:
|
|
@@ -74,37 +81,73 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
|
|
|
74
81
|
Returns:
|
|
75
82
|
List of CodeBlock instances with hash values
|
|
76
83
|
"""
|
|
77
|
-
#
|
|
78
|
-
|
|
84
|
+
# Performance optimization: Parse AST once and cache for _is_single_statement_in_source() calls
|
|
85
|
+
self._cached_ast = self._parse_content_safe(content)
|
|
86
|
+
self._cached_content = content
|
|
87
|
+
|
|
88
|
+
# Performance optimization: Build line-to-node index for O(1) lookups
|
|
89
|
+
self._line_to_nodes = self._build_line_to_node_index(self._cached_ast)
|
|
90
|
+
|
|
91
|
+
try:
|
|
92
|
+
# Get docstring line ranges
|
|
93
|
+
docstring_ranges = self._get_docstring_ranges_from_content(content)
|
|
79
94
|
|
|
80
|
-
|
|
81
|
-
|
|
95
|
+
# Tokenize with line number tracking
|
|
96
|
+
lines_with_numbers = self._tokenize_with_line_numbers(content, docstring_ranges)
|
|
82
97
|
|
|
83
|
-
|
|
84
|
-
|
|
98
|
+
# Generate rolling hash windows
|
|
99
|
+
windows = self._rolling_hash_with_tracking(
|
|
100
|
+
lines_with_numbers, config.min_duplicate_lines
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
return self._filter_valid_blocks(windows, file_path, content)
|
|
104
|
+
finally:
|
|
105
|
+
# Clear cache after analysis to avoid memory leaks
|
|
106
|
+
self._cached_ast = None
|
|
107
|
+
self._cached_content = None
|
|
108
|
+
self._line_to_nodes = None
|
|
85
109
|
|
|
110
|
+
def _filter_valid_blocks(
|
|
111
|
+
self,
|
|
112
|
+
windows: list[tuple[int, int, int, str]],
|
|
113
|
+
file_path: Path,
|
|
114
|
+
content: str,
|
|
115
|
+
) -> list[CodeBlock]:
|
|
116
|
+
"""Filter hash windows and create valid CodeBlock instances."""
|
|
86
117
|
blocks = []
|
|
87
118
|
for hash_val, start_line, end_line, snippet in windows:
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
if self._is_single_statement_in_source(content, start_line, end_line):
|
|
91
|
-
continue
|
|
92
|
-
|
|
93
|
-
block = CodeBlock(
|
|
94
|
-
file_path=file_path,
|
|
95
|
-
start_line=start_line,
|
|
96
|
-
end_line=end_line,
|
|
97
|
-
snippet=snippet,
|
|
98
|
-
hash_value=hash_val,
|
|
119
|
+
block = self._create_block_if_valid(
|
|
120
|
+
file_path, content, hash_val, start_line, end_line, snippet
|
|
99
121
|
)
|
|
122
|
+
if block:
|
|
123
|
+
blocks.append(block)
|
|
124
|
+
return blocks
|
|
100
125
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
126
|
+
def _create_block_if_valid( # pylint: disable=too-many-arguments,too-many-positional-arguments
|
|
127
|
+
self,
|
|
128
|
+
file_path: Path,
|
|
129
|
+
content: str,
|
|
130
|
+
hash_val: int,
|
|
131
|
+
start_line: int,
|
|
132
|
+
end_line: int,
|
|
133
|
+
snippet: str,
|
|
134
|
+
) -> CodeBlock | None:
|
|
135
|
+
"""Create CodeBlock if it passes all validation checks."""
|
|
136
|
+
if self._is_single_statement_in_source(content, start_line, end_line):
|
|
137
|
+
return None
|
|
104
138
|
|
|
105
|
-
|
|
139
|
+
block = CodeBlock(
|
|
140
|
+
file_path=file_path,
|
|
141
|
+
start_line=start_line,
|
|
142
|
+
end_line=end_line,
|
|
143
|
+
snippet=snippet,
|
|
144
|
+
hash_value=hash_val,
|
|
145
|
+
)
|
|
106
146
|
|
|
107
|
-
|
|
147
|
+
if self._filter_registry.should_filter_block(block, content):
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
return block
|
|
108
151
|
|
|
109
152
|
def _get_docstring_ranges_from_content(self, content: str) -> set[int]:
|
|
110
153
|
"""Extract line numbers that are part of docstrings.
|
|
@@ -172,20 +215,21 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
|
|
|
172
215
|
List of (original_line_number, normalized_code) tuples
|
|
173
216
|
"""
|
|
174
217
|
lines_with_numbers = []
|
|
218
|
+
in_multiline_import = False
|
|
175
219
|
|
|
176
220
|
for line_num, line in enumerate(content.split("\n"), start=1):
|
|
177
|
-
# Skip docstring lines
|
|
178
221
|
if line_num in docstring_lines:
|
|
179
222
|
continue
|
|
180
223
|
|
|
181
|
-
|
|
182
|
-
line = self._hasher._strip_comments(line) # pylint: disable=protected-access
|
|
183
|
-
line = " ".join(line.split())
|
|
184
|
-
|
|
224
|
+
line = self._hasher._normalize_line(line) # pylint: disable=protected-access
|
|
185
225
|
if not line:
|
|
186
226
|
continue
|
|
187
227
|
|
|
188
|
-
if
|
|
228
|
+
# Update multi-line import state and check if line should be skipped
|
|
229
|
+
in_multiline_import, should_skip = self._hasher._should_skip_import_line( # pylint: disable=protected-access
|
|
230
|
+
line, in_multiline_import
|
|
231
|
+
)
|
|
232
|
+
if should_skip:
|
|
189
233
|
continue
|
|
190
234
|
|
|
191
235
|
lines_with_numbers.append((line_num, line))
|
|
@@ -225,10 +269,20 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
|
|
|
225
269
|
return hashes
|
|
226
270
|
|
|
227
271
|
def _is_single_statement_in_source(self, content: str, start_line: int, end_line: int) -> bool:
|
|
228
|
-
"""Check if a line range in the original source is a single logical statement.
|
|
229
|
-
|
|
230
|
-
if
|
|
231
|
-
|
|
272
|
+
"""Check if a line range in the original source is a single logical statement.
|
|
273
|
+
|
|
274
|
+
Performance optimization: Uses cached AST if available (set by analyze() method)
|
|
275
|
+
to avoid re-parsing the entire file for each hash window check.
|
|
276
|
+
"""
|
|
277
|
+
# Use cached AST if available and content matches
|
|
278
|
+
tree: ast.Module | None
|
|
279
|
+
if self._cached_ast is not None and content == self._cached_content:
|
|
280
|
+
tree = self._cached_ast
|
|
281
|
+
else:
|
|
282
|
+
# Fallback: parse content (used by tests or standalone calls)
|
|
283
|
+
tree = self._parse_content_safe(content)
|
|
284
|
+
if tree is None:
|
|
285
|
+
return False
|
|
232
286
|
|
|
233
287
|
return self._check_overlapping_nodes(tree, start_line, end_line)
|
|
234
288
|
|
|
@@ -240,13 +294,99 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
|
|
|
240
294
|
except SyntaxError:
|
|
241
295
|
return None
|
|
242
296
|
|
|
297
|
+
@staticmethod
|
|
298
|
+
def _build_line_to_node_index(tree: ast.Module | None) -> dict[int, list[ast.AST]] | None:
|
|
299
|
+
"""Build an index mapping each line number to overlapping AST nodes.
|
|
300
|
+
|
|
301
|
+
Performance optimization: This allows O(1) lookups instead of O(n) ast.walk() calls.
|
|
302
|
+
For a file with 5,144 nodes and 673 hash windows, this reduces 3.46M node operations
|
|
303
|
+
to just ~3,365 relevant node checks (99.9% reduction).
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
tree: Parsed AST tree (None if parsing failed)
|
|
307
|
+
|
|
308
|
+
Returns:
|
|
309
|
+
Dictionary mapping line numbers to list of AST nodes overlapping that line,
|
|
310
|
+
or None if tree is None
|
|
311
|
+
"""
|
|
312
|
+
if tree is None:
|
|
313
|
+
return None
|
|
314
|
+
|
|
315
|
+
line_to_nodes: dict[int, list[ast.AST]] = {}
|
|
316
|
+
for node in ast.walk(tree):
|
|
317
|
+
if PythonDuplicateAnalyzer._node_has_line_info(node):
|
|
318
|
+
PythonDuplicateAnalyzer._add_node_to_index(node, line_to_nodes)
|
|
319
|
+
|
|
320
|
+
return line_to_nodes
|
|
321
|
+
|
|
322
|
+
@staticmethod
|
|
323
|
+
def _node_has_line_info(node: ast.AST) -> bool:
|
|
324
|
+
"""Check if node has valid line number information."""
|
|
325
|
+
if not hasattr(node, "lineno") or not hasattr(node, "end_lineno"):
|
|
326
|
+
return False
|
|
327
|
+
return node.lineno is not None and node.end_lineno is not None
|
|
328
|
+
|
|
329
|
+
@staticmethod
|
|
330
|
+
def _add_node_to_index(node: ast.AST, line_to_nodes: dict[int, list[ast.AST]]) -> None:
|
|
331
|
+
"""Add node to all lines it overlaps in the index."""
|
|
332
|
+
for line_num in range(node.lineno, node.end_lineno + 1): # type: ignore[attr-defined]
|
|
333
|
+
if line_num not in line_to_nodes:
|
|
334
|
+
line_to_nodes[line_num] = []
|
|
335
|
+
line_to_nodes[line_num].append(node)
|
|
336
|
+
|
|
243
337
|
def _check_overlapping_nodes(self, tree: ast.Module, start_line: int, end_line: int) -> bool:
|
|
244
|
-
"""Check if any AST node overlaps and matches single-statement pattern.
|
|
338
|
+
"""Check if any AST node overlaps and matches single-statement pattern.
|
|
339
|
+
|
|
340
|
+
Performance optimization: Use line-to-node index for O(1) lookups instead of O(n) ast.walk().
|
|
341
|
+
"""
|
|
342
|
+
if self._line_to_nodes is not None:
|
|
343
|
+
return self._check_nodes_via_index(start_line, end_line)
|
|
344
|
+
return self._check_nodes_via_walk(tree, start_line, end_line)
|
|
345
|
+
|
|
346
|
+
def _check_nodes_via_index(self, start_line: int, end_line: int) -> bool:
|
|
347
|
+
"""Check nodes using line-to-node index for O(1) lookups."""
|
|
348
|
+
candidates = self._collect_candidate_nodes_from_index(start_line, end_line)
|
|
349
|
+
return self._any_node_matches_pattern(candidates, start_line, end_line)
|
|
350
|
+
|
|
351
|
+
def _collect_candidate_nodes_from_index(self, start_line: int, end_line: int) -> set[ast.AST]:
|
|
352
|
+
"""Collect unique nodes that overlap with the line range from index."""
|
|
353
|
+
candidate_nodes: set[ast.AST] = set()
|
|
354
|
+
for line_num in range(start_line, end_line + 1):
|
|
355
|
+
if self._line_to_nodes and line_num in self._line_to_nodes:
|
|
356
|
+
candidate_nodes.update(self._line_to_nodes[line_num])
|
|
357
|
+
return candidate_nodes
|
|
358
|
+
|
|
359
|
+
def _any_node_matches_pattern(
|
|
360
|
+
self, nodes: set[ast.AST], start_line: int, end_line: int
|
|
361
|
+
) -> bool:
|
|
362
|
+
"""Check if any node matches single-statement pattern."""
|
|
363
|
+
for node in nodes:
|
|
364
|
+
if self._is_single_statement_pattern(node, start_line, end_line):
|
|
365
|
+
return True
|
|
366
|
+
return False
|
|
367
|
+
|
|
368
|
+
def _check_nodes_via_walk(self, tree: ast.Module, start_line: int, end_line: int) -> bool:
|
|
369
|
+
"""Check nodes using ast.walk() fallback for tests or standalone calls."""
|
|
245
370
|
for node in ast.walk(tree):
|
|
246
|
-
if self.
|
|
371
|
+
if self._node_matches_via_walk(node, start_line, end_line):
|
|
247
372
|
return True
|
|
248
373
|
return False
|
|
249
374
|
|
|
375
|
+
def _node_matches_via_walk(self, node: ast.AST, start_line: int, end_line: int) -> bool:
|
|
376
|
+
"""Check if a single node overlaps and matches pattern."""
|
|
377
|
+
if not self._node_overlaps_range(node, start_line, end_line):
|
|
378
|
+
return False
|
|
379
|
+
return self._is_single_statement_pattern(node, start_line, end_line)
|
|
380
|
+
|
|
381
|
+
@staticmethod
|
|
382
|
+
def _node_overlaps_range(node: ast.AST, start_line: int, end_line: int) -> bool:
|
|
383
|
+
"""Check if node overlaps with the given line range."""
|
|
384
|
+
if not hasattr(node, "lineno") or not hasattr(node, "end_lineno"):
|
|
385
|
+
return False
|
|
386
|
+
node_end = node.end_lineno
|
|
387
|
+
node_start = node.lineno
|
|
388
|
+
return not (node_end < start_line or node_start > end_line)
|
|
389
|
+
|
|
250
390
|
def _node_overlaps_and_matches(self, node: ast.AST, start_line: int, end_line: int) -> bool:
|
|
251
391
|
"""Check if node overlaps with range and matches single-statement pattern."""
|
|
252
392
|
if not hasattr(node, "lineno") or not hasattr(node, "end_lineno"):
|
|
@@ -33,26 +33,80 @@ class TokenHasher:
|
|
|
33
33
|
List of normalized code lines (non-empty, comments removed, imports filtered)
|
|
34
34
|
"""
|
|
35
35
|
lines = []
|
|
36
|
+
in_multiline_import = False
|
|
36
37
|
|
|
37
38
|
for line in code.split("\n"):
|
|
38
|
-
|
|
39
|
-
line = self._strip_comments(line)
|
|
40
|
-
|
|
41
|
-
# Normalize whitespace (collapse to single space)
|
|
42
|
-
line = " ".join(line.split())
|
|
43
|
-
|
|
44
|
-
# Skip empty lines
|
|
39
|
+
line = self._normalize_line(line)
|
|
45
40
|
if not line:
|
|
46
41
|
continue
|
|
47
42
|
|
|
48
|
-
#
|
|
49
|
-
|
|
43
|
+
# Update multi-line import state and check if line should be skipped
|
|
44
|
+
in_multiline_import, should_skip = self._should_skip_import_line(
|
|
45
|
+
line, in_multiline_import
|
|
46
|
+
)
|
|
47
|
+
if should_skip:
|
|
50
48
|
continue
|
|
51
49
|
|
|
52
50
|
lines.append(line)
|
|
53
51
|
|
|
54
52
|
return lines
|
|
55
53
|
|
|
54
|
+
def _normalize_line(self, line: str) -> str:
|
|
55
|
+
"""Normalize a line by removing comments and excess whitespace.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
line: Raw source code line
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
Normalized line (empty string if line has no content)
|
|
62
|
+
"""
|
|
63
|
+
line = self._strip_comments(line)
|
|
64
|
+
return " ".join(line.split())
|
|
65
|
+
|
|
66
|
+
def _should_skip_import_line(self, line: str, in_multiline_import: bool) -> tuple[bool, bool]:
|
|
67
|
+
"""Determine if an import line should be skipped.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
line: Normalized code line
|
|
71
|
+
in_multiline_import: Whether we're currently inside a multi-line import
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Tuple of (new_in_multiline_import_state, should_skip_line)
|
|
75
|
+
"""
|
|
76
|
+
if self._is_multiline_import_start(line):
|
|
77
|
+
return True, True
|
|
78
|
+
|
|
79
|
+
if in_multiline_import:
|
|
80
|
+
return self._handle_multiline_import_continuation(line)
|
|
81
|
+
|
|
82
|
+
if self._is_import_statement(line):
|
|
83
|
+
return False, True
|
|
84
|
+
|
|
85
|
+
return False, False
|
|
86
|
+
|
|
87
|
+
def _is_multiline_import_start(self, line: str) -> bool:
|
|
88
|
+
"""Check if line starts a multi-line import statement.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
line: Normalized code line
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
True if line starts a multi-line import (has opening paren but no closing)
|
|
95
|
+
"""
|
|
96
|
+
return self._is_import_statement(line) and "(" in line and ")" not in line
|
|
97
|
+
|
|
98
|
+
def _handle_multiline_import_continuation(self, line: str) -> tuple[bool, bool]:
|
|
99
|
+
"""Handle a line that's part of a multi-line import.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
line: Normalized code line inside a multi-line import
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
Tuple of (still_in_import, should_skip)
|
|
106
|
+
"""
|
|
107
|
+
closes_import = ")" in line
|
|
108
|
+
return not closes_import, True
|
|
109
|
+
|
|
56
110
|
def _strip_comments(self, line: str) -> str:
|
|
57
111
|
"""Remove comments from line (Python # and // style).
|
|
58
112
|
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""
|
|
2
|
+
File: src/linters/file_header/__init__.py
|
|
3
|
+
Purpose: File header linter module initialization
|
|
4
|
+
Exports: FileHeaderRule
|
|
5
|
+
Depends: linter.FileHeaderRule
|
|
6
|
+
Implements: Module-level exports for clean API
|
|
7
|
+
Related: linter.py for main rule implementation
|
|
8
|
+
|
|
9
|
+
Overview:
|
|
10
|
+
Initializes the file header linter module providing multi-language file header
|
|
11
|
+
validation with mandatory field checking, atemporal language detection, and configuration
|
|
12
|
+
support. Main entry point for file header linting functionality.
|
|
13
|
+
|
|
14
|
+
Usage:
|
|
15
|
+
from src.linters.file_header import FileHeaderRule
|
|
16
|
+
rule = FileHeaderRule()
|
|
17
|
+
violations = rule.check(context)
|
|
18
|
+
|
|
19
|
+
Notes: Follows standard Python module initialization pattern with __all__ export control
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from .linter import FileHeaderRule
|
|
23
|
+
|
|
24
|
+
__all__ = ["FileHeaderRule"]
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""
|
|
2
|
+
File: src/linters/file_header/atemporal_detector.py
|
|
3
|
+
Purpose: Detects temporal language patterns in file headers
|
|
4
|
+
Exports: AtemporalDetector class
|
|
5
|
+
Depends: re module for regex matching
|
|
6
|
+
Implements: Regex-based pattern matching with configurable patterns
|
|
7
|
+
Related: linter.py for detector usage, violation_builder.py for violation creation
|
|
8
|
+
|
|
9
|
+
Overview:
|
|
10
|
+
Implements pattern-based detection of temporal language that violates atemporal
|
|
11
|
+
documentation requirements. Detects dates, temporal qualifiers, state change language,
|
|
12
|
+
and future references using regex patterns. Provides violation details for each pattern match.
|
|
13
|
+
|
|
14
|
+
Usage:
|
|
15
|
+
detector = AtemporalDetector()
|
|
16
|
+
violations = detector.detect_violations(header_text)
|
|
17
|
+
|
|
18
|
+
Notes: Four pattern categories - dates, temporal qualifiers, state changes, future references
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import re
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class AtemporalDetector:
|
|
25
|
+
"""Detects temporal language patterns in text."""
|
|
26
|
+
|
|
27
|
+
# Date patterns
|
|
28
|
+
DATE_PATTERNS = [
|
|
29
|
+
(r"\d{4}-\d{2}-\d{2}", "ISO date format (YYYY-MM-DD)"),
|
|
30
|
+
(
|
|
31
|
+
r"(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{4}",
|
|
32
|
+
"Month Year format",
|
|
33
|
+
),
|
|
34
|
+
(r"(?:Created|Updated|Modified):\s*\d{4}", "Date metadata"),
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
# Temporal qualifiers
|
|
38
|
+
TEMPORAL_QUALIFIERS = [
|
|
39
|
+
(r"\bcurrently\b", 'temporal qualifier "currently"'),
|
|
40
|
+
(r"\bnow\b", 'temporal qualifier "now"'),
|
|
41
|
+
(r"\brecently\b", 'temporal qualifier "recently"'),
|
|
42
|
+
(r"\bsoon\b", 'temporal qualifier "soon"'),
|
|
43
|
+
(r"\bfor now\b", 'temporal qualifier "for now"'),
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
# State change language
|
|
47
|
+
STATE_CHANGE = [
|
|
48
|
+
(r"\breplaces?\b", 'state change "replaces"'),
|
|
49
|
+
(r"\bmigrated from\b", 'state change "migrated from"'),
|
|
50
|
+
(r"\bformerly\b", 'state change "formerly"'),
|
|
51
|
+
(r"\bold implementation\b", 'state change "old"'),
|
|
52
|
+
(r"\bnew implementation\b", 'state change "new"'),
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
# Future references
|
|
56
|
+
FUTURE_REFS = [
|
|
57
|
+
(r"\bwill be\b", 'future reference "will be"'),
|
|
58
|
+
(r"\bplanned\b", 'future reference "planned"'),
|
|
59
|
+
(r"\bto be added\b", 'future reference "to be added"'),
|
|
60
|
+
(r"\bcoming soon\b", 'future reference "coming soon"'),
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
def detect_violations( # thailint: ignore[nesting]
|
|
64
|
+
self, text: str
|
|
65
|
+
) -> list[tuple[str, str, int]]:
|
|
66
|
+
"""Detect all temporal language violations in text.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
text: Text to check for temporal language
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
List of (pattern, description, line_number) tuples for each violation
|
|
73
|
+
"""
|
|
74
|
+
violations = []
|
|
75
|
+
|
|
76
|
+
# Check all pattern categories
|
|
77
|
+
all_patterns = (
|
|
78
|
+
self.DATE_PATTERNS + self.TEMPORAL_QUALIFIERS + self.STATE_CHANGE + self.FUTURE_REFS
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
lines = text.split("\n")
|
|
82
|
+
for line_num, line in enumerate(lines, start=1):
|
|
83
|
+
for pattern, description in all_patterns:
|
|
84
|
+
if re.search(pattern, line, re.IGNORECASE):
|
|
85
|
+
violations.append((pattern, description, line_num))
|
|
86
|
+
|
|
87
|
+
return violations
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""
|
|
2
|
+
File: src/linters/file_header/config.py
|
|
3
|
+
Purpose: Configuration model for file header linter
|
|
4
|
+
Exports: FileHeaderConfig dataclass
|
|
5
|
+
Depends: dataclasses, pathlib
|
|
6
|
+
Implements: Configuration with validation and defaults
|
|
7
|
+
Related: linter.py for configuration usage
|
|
8
|
+
|
|
9
|
+
Overview:
|
|
10
|
+
Defines configuration structure for file header linter including required fields
|
|
11
|
+
per language, ignore patterns, and validation options. Provides defaults matching
|
|
12
|
+
ai-doc-standard.md requirements and supports loading from .thailint.yaml configuration.
|
|
13
|
+
|
|
14
|
+
Usage:
|
|
15
|
+
config = FileHeaderConfig()
|
|
16
|
+
config = FileHeaderConfig.from_dict(config_dict, "python")
|
|
17
|
+
|
|
18
|
+
Notes: Dataclass with validation and language-specific defaults
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from dataclasses import dataclass, field
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class FileHeaderConfig:
|
|
26
|
+
"""Configuration for file header linting."""
|
|
27
|
+
|
|
28
|
+
# Required fields by language
|
|
29
|
+
required_fields_python: list[str] = field(
|
|
30
|
+
default_factory=lambda: [
|
|
31
|
+
"Purpose",
|
|
32
|
+
"Scope",
|
|
33
|
+
"Overview",
|
|
34
|
+
"Dependencies",
|
|
35
|
+
"Exports",
|
|
36
|
+
"Interfaces",
|
|
37
|
+
"Implementation",
|
|
38
|
+
]
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# Enforce atemporal language checking
|
|
42
|
+
enforce_atemporal: bool = True
|
|
43
|
+
|
|
44
|
+
# Patterns to ignore (file paths)
|
|
45
|
+
ignore: list[str] = field(
|
|
46
|
+
default_factory=lambda: ["test/**", "**/migrations/**", "**/__init__.py"]
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def from_dict(cls, config_dict: dict, language: str) -> "FileHeaderConfig":
|
|
51
|
+
"""Create config from dictionary.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
config_dict: Dictionary of configuration values
|
|
55
|
+
language: Programming language for language-specific config
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
FileHeaderConfig instance with values from dictionary
|
|
59
|
+
"""
|
|
60
|
+
return cls(
|
|
61
|
+
required_fields_python=config_dict.get("required_fields", {}).get(
|
|
62
|
+
"python", cls().required_fields_python
|
|
63
|
+
),
|
|
64
|
+
enforce_atemporal=config_dict.get("enforce_atemporal", True),
|
|
65
|
+
ignore=config_dict.get("ignore", cls().ignore),
|
|
66
|
+
)
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""
|
|
2
|
+
File: src/linters/file_header/field_validator.py
|
|
3
|
+
Purpose: Validates mandatory fields in file headers
|
|
4
|
+
Exports: FieldValidator class
|
|
5
|
+
Depends: FileHeaderConfig for field requirements
|
|
6
|
+
Implements: Configuration-driven validation with field presence checking
|
|
7
|
+
Related: linter.py for validator usage, config.py for configuration
|
|
8
|
+
|
|
9
|
+
Overview:
|
|
10
|
+
Validates presence and quality of mandatory header fields. Checks that all
|
|
11
|
+
required fields are present, non-empty, and meet minimum content requirements.
|
|
12
|
+
Supports language-specific required fields and provides detailed violation messages.
|
|
13
|
+
|
|
14
|
+
Usage:
|
|
15
|
+
validator = FieldValidator(config)
|
|
16
|
+
violations = validator.validate_fields(fields, "python")
|
|
17
|
+
|
|
18
|
+
Notes: Language-specific field requirements defined in config
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from .config import FileHeaderConfig
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class FieldValidator:
|
|
25
|
+
"""Validates mandatory fields in headers."""
|
|
26
|
+
|
|
27
|
+
def __init__(self, config: FileHeaderConfig):
|
|
28
|
+
"""Initialize validator with configuration.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
config: File header configuration with required fields
|
|
32
|
+
"""
|
|
33
|
+
self.config = config
|
|
34
|
+
|
|
35
|
+
def validate_fields( # thailint: ignore[nesting]
|
|
36
|
+
self, fields: dict[str, str], language: str
|
|
37
|
+
) -> list[tuple[str, str]]:
|
|
38
|
+
"""Validate all required fields are present.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
fields: Dictionary of parsed header fields
|
|
42
|
+
language: File language (python, typescript, etc.)
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
List of (field_name, error_message) tuples for missing/invalid fields
|
|
46
|
+
"""
|
|
47
|
+
violations = []
|
|
48
|
+
required_fields = self._get_required_fields(language)
|
|
49
|
+
|
|
50
|
+
for field_name in required_fields:
|
|
51
|
+
if field_name not in fields:
|
|
52
|
+
violations.append((field_name, f"Missing mandatory field: {field_name}"))
|
|
53
|
+
elif not fields[field_name] or len(fields[field_name].strip()) == 0:
|
|
54
|
+
violations.append((field_name, f"Empty mandatory field: {field_name}"))
|
|
55
|
+
|
|
56
|
+
return violations
|
|
57
|
+
|
|
58
|
+
def _get_required_fields(self, language: str) -> list[str]:
|
|
59
|
+
"""Get required fields for language.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
language: Programming language
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
List of required field names for the language
|
|
66
|
+
"""
|
|
67
|
+
if language == "python":
|
|
68
|
+
return self.config.required_fields_python
|
|
69
|
+
return [] # Other languages in PR5
|