thailint 0.4.4__tar.gz → 0.4.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {thailint-0.4.4 → thailint-0.4.5}/PKG-INFO +1 -1
- {thailint-0.4.4 → thailint-0.4.5}/pyproject.toml +1 -1
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/python_analyzer.py +148 -39
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/token_hasher.py +63 -9
- thailint-0.4.5/src/linters/file_header/__init__.py +24 -0
- thailint-0.4.5/src/linters/file_header/atemporal_detector.py +87 -0
- thailint-0.4.5/src/linters/file_header/config.py +66 -0
- thailint-0.4.5/src/linters/file_header/field_validator.py +69 -0
- thailint-0.4.5/src/linters/file_header/linter.py +313 -0
- thailint-0.4.5/src/linters/file_header/python_parser.py +86 -0
- thailint-0.4.5/src/linters/file_header/violation_builder.py +78 -0
- {thailint-0.4.4 → thailint-0.4.5}/CHANGELOG.md +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/LICENSE +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/README.md +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/__init__.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/analyzers/__init__.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/analyzers/typescript_base.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/api.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/cli.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/config.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/core/__init__.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/core/base.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/core/cli_utils.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/core/config_parser.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/core/linter_utils.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/core/registry.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/core/rule_discovery.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/core/types.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/core/violation_builder.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linter_config/__init__.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linter_config/ignore.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linter_config/loader.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/__init__.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/__init__.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/base_token_analyzer.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/block_filter.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/block_grouper.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/cache.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/cache_query.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/config.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/config_loader.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/deduplicator.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/duplicate_storage.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/file_analyzer.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/inline_ignore.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/linter.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/storage_initializer.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/typescript_analyzer.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/violation_builder.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/violation_filter.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/violation_generator.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/file_placement/__init__.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/file_placement/config_loader.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/file_placement/directory_matcher.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/file_placement/linter.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/file_placement/path_resolver.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/file_placement/pattern_matcher.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/file_placement/pattern_validator.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/file_placement/rule_checker.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/file_placement/violation_factory.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/magic_numbers/__init__.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/magic_numbers/config.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/magic_numbers/context_analyzer.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/magic_numbers/linter.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/magic_numbers/python_analyzer.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/magic_numbers/typescript_analyzer.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/magic_numbers/violation_builder.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/nesting/__init__.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/nesting/config.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/nesting/linter.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/nesting/python_analyzer.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/nesting/typescript_analyzer.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/nesting/typescript_function_extractor.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/nesting/violation_builder.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/srp/__init__.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/srp/class_analyzer.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/srp/config.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/srp/heuristics.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/srp/linter.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/srp/metrics_evaluator.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/srp/python_analyzer.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/srp/typescript_analyzer.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/srp/typescript_metrics_calculator.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/linters/srp/violation_builder.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/orchestrator/__init__.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/orchestrator/core.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/orchestrator/language_detector.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/templates/thailint_config_template.yaml +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/utils/__init__.py +0 -0
- {thailint-0.4.4 → thailint-0.4.5}/src/utils/project_root.py +0 -0
|
@@ -17,7 +17,7 @@ build-backend = "poetry.core.masonry.api"
|
|
|
17
17
|
|
|
18
18
|
[tool.poetry]
|
|
19
19
|
name = "thailint"
|
|
20
|
-
version = "0.4.
|
|
20
|
+
version = "0.4.5"
|
|
21
21
|
description = "The AI Linter - Enterprise-grade linting and governance for AI-generated code across multiple languages"
|
|
22
22
|
authors = ["Steve Jackson"]
|
|
23
23
|
license = "MIT"
|
|
@@ -65,8 +65,12 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
|
|
|
65
65
|
# Performance optimization: Cache parsed AST to avoid re-parsing for each hash window
|
|
66
66
|
self._cached_ast: ast.Module | None = None
|
|
67
67
|
self._cached_content: str | None = None
|
|
68
|
+
# Performance optimization: Line-to-node index for O(1) lookups instead of O(n) ast.walk()
|
|
69
|
+
self._line_to_nodes: dict[int, list[ast.AST]] | None = None
|
|
68
70
|
|
|
69
|
-
def analyze(
|
|
71
|
+
def analyze( # thailint: ignore[nesting.excessive-depth]
|
|
72
|
+
self, file_path: Path, content: str, config: DRYConfig
|
|
73
|
+
) -> list[CodeBlock]:
|
|
70
74
|
"""Analyze Python file for duplicate code blocks, excluding docstrings.
|
|
71
75
|
|
|
72
76
|
Args:
|
|
@@ -81,6 +85,9 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
|
|
|
81
85
|
self._cached_ast = self._parse_content_safe(content)
|
|
82
86
|
self._cached_content = content
|
|
83
87
|
|
|
88
|
+
# Performance optimization: Build line-to-node index for O(1) lookups
|
|
89
|
+
self._line_to_nodes = self._build_line_to_node_index(self._cached_ast)
|
|
90
|
+
|
|
84
91
|
try:
|
|
85
92
|
# Get docstring line ranges
|
|
86
93
|
docstring_ranges = self._get_docstring_ranges_from_content(content)
|
|
@@ -89,34 +96,58 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
|
|
|
89
96
|
lines_with_numbers = self._tokenize_with_line_numbers(content, docstring_ranges)
|
|
90
97
|
|
|
91
98
|
# Generate rolling hash windows
|
|
92
|
-
windows = self._rolling_hash_with_tracking(
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
for hash_val, start_line, end_line, snippet in windows:
|
|
96
|
-
# Skip blocks that are single logical statements
|
|
97
|
-
# Check the original source code, not the normalized snippet
|
|
98
|
-
if self._is_single_statement_in_source(content, start_line, end_line):
|
|
99
|
-
continue
|
|
100
|
-
|
|
101
|
-
block = CodeBlock(
|
|
102
|
-
file_path=file_path,
|
|
103
|
-
start_line=start_line,
|
|
104
|
-
end_line=end_line,
|
|
105
|
-
snippet=snippet,
|
|
106
|
-
hash_value=hash_val,
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
# Apply extensible filters (keyword arguments, imports, etc.)
|
|
110
|
-
if self._filter_registry.should_filter_block(block, content):
|
|
111
|
-
continue
|
|
112
|
-
|
|
113
|
-
blocks.append(block)
|
|
99
|
+
windows = self._rolling_hash_with_tracking(
|
|
100
|
+
lines_with_numbers, config.min_duplicate_lines
|
|
101
|
+
)
|
|
114
102
|
|
|
115
|
-
return
|
|
103
|
+
return self._filter_valid_blocks(windows, file_path, content)
|
|
116
104
|
finally:
|
|
117
105
|
# Clear cache after analysis to avoid memory leaks
|
|
118
106
|
self._cached_ast = None
|
|
119
107
|
self._cached_content = None
|
|
108
|
+
self._line_to_nodes = None
|
|
109
|
+
|
|
110
|
+
def _filter_valid_blocks(
|
|
111
|
+
self,
|
|
112
|
+
windows: list[tuple[int, int, int, str]],
|
|
113
|
+
file_path: Path,
|
|
114
|
+
content: str,
|
|
115
|
+
) -> list[CodeBlock]:
|
|
116
|
+
"""Filter hash windows and create valid CodeBlock instances."""
|
|
117
|
+
blocks = []
|
|
118
|
+
for hash_val, start_line, end_line, snippet in windows:
|
|
119
|
+
block = self._create_block_if_valid(
|
|
120
|
+
file_path, content, hash_val, start_line, end_line, snippet
|
|
121
|
+
)
|
|
122
|
+
if block:
|
|
123
|
+
blocks.append(block)
|
|
124
|
+
return blocks
|
|
125
|
+
|
|
126
|
+
def _create_block_if_valid( # pylint: disable=too-many-arguments,too-many-positional-arguments
|
|
127
|
+
self,
|
|
128
|
+
file_path: Path,
|
|
129
|
+
content: str,
|
|
130
|
+
hash_val: int,
|
|
131
|
+
start_line: int,
|
|
132
|
+
end_line: int,
|
|
133
|
+
snippet: str,
|
|
134
|
+
) -> CodeBlock | None:
|
|
135
|
+
"""Create CodeBlock if it passes all validation checks."""
|
|
136
|
+
if self._is_single_statement_in_source(content, start_line, end_line):
|
|
137
|
+
return None
|
|
138
|
+
|
|
139
|
+
block = CodeBlock(
|
|
140
|
+
file_path=file_path,
|
|
141
|
+
start_line=start_line,
|
|
142
|
+
end_line=end_line,
|
|
143
|
+
snippet=snippet,
|
|
144
|
+
hash_value=hash_val,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
if self._filter_registry.should_filter_block(block, content):
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
return block
|
|
120
151
|
|
|
121
152
|
def _get_docstring_ranges_from_content(self, content: str) -> set[int]:
|
|
122
153
|
"""Extract line numbers that are part of docstrings.
|
|
@@ -184,20 +215,21 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
|
|
|
184
215
|
List of (original_line_number, normalized_code) tuples
|
|
185
216
|
"""
|
|
186
217
|
lines_with_numbers = []
|
|
218
|
+
in_multiline_import = False
|
|
187
219
|
|
|
188
220
|
for line_num, line in enumerate(content.split("\n"), start=1):
|
|
189
|
-
# Skip docstring lines
|
|
190
221
|
if line_num in docstring_lines:
|
|
191
222
|
continue
|
|
192
223
|
|
|
193
|
-
|
|
194
|
-
line = self._hasher._strip_comments(line) # pylint: disable=protected-access
|
|
195
|
-
line = " ".join(line.split())
|
|
196
|
-
|
|
224
|
+
line = self._hasher._normalize_line(line) # pylint: disable=protected-access
|
|
197
225
|
if not line:
|
|
198
226
|
continue
|
|
199
227
|
|
|
200
|
-
if
|
|
228
|
+
# Update multi-line import state and check if line should be skipped
|
|
229
|
+
in_multiline_import, should_skip = self._hasher._should_skip_import_line( # pylint: disable=protected-access
|
|
230
|
+
line, in_multiline_import
|
|
231
|
+
)
|
|
232
|
+
if should_skip:
|
|
201
233
|
continue
|
|
202
234
|
|
|
203
235
|
lines_with_numbers.append((line_num, line))
|
|
@@ -243,6 +275,7 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
|
|
|
243
275
|
to avoid re-parsing the entire file for each hash window check.
|
|
244
276
|
"""
|
|
245
277
|
# Use cached AST if available and content matches
|
|
278
|
+
tree: ast.Module | None
|
|
246
279
|
if self._cached_ast is not None and content == self._cached_content:
|
|
247
280
|
tree = self._cached_ast
|
|
248
281
|
else:
|
|
@@ -261,23 +294,99 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
|
|
|
261
294
|
except SyntaxError:
|
|
262
295
|
return None
|
|
263
296
|
|
|
264
|
-
|
|
265
|
-
|
|
297
|
+
@staticmethod
|
|
298
|
+
def _build_line_to_node_index(tree: ast.Module | None) -> dict[int, list[ast.AST]] | None:
|
|
299
|
+
"""Build an index mapping each line number to overlapping AST nodes.
|
|
300
|
+
|
|
301
|
+
Performance optimization: This allows O(1) lookups instead of O(n) ast.walk() calls.
|
|
302
|
+
For a file with 5,144 nodes and 673 hash windows, this reduces 3.46M node operations
|
|
303
|
+
to just ~3,365 relevant node checks (99.9% reduction).
|
|
266
304
|
|
|
267
|
-
|
|
305
|
+
Args:
|
|
306
|
+
tree: Parsed AST tree (None if parsing failed)
|
|
307
|
+
|
|
308
|
+
Returns:
|
|
309
|
+
Dictionary mapping line numbers to list of AST nodes overlapping that line,
|
|
310
|
+
or None if tree is None
|
|
268
311
|
"""
|
|
312
|
+
if tree is None:
|
|
313
|
+
return None
|
|
314
|
+
|
|
315
|
+
line_to_nodes: dict[int, list[ast.AST]] = {}
|
|
269
316
|
for node in ast.walk(tree):
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
continue # No overlap, skip expensive pattern matching
|
|
317
|
+
if PythonDuplicateAnalyzer._node_has_line_info(node):
|
|
318
|
+
PythonDuplicateAnalyzer._add_node_to_index(node, line_to_nodes)
|
|
319
|
+
|
|
320
|
+
return line_to_nodes
|
|
275
321
|
|
|
276
|
-
|
|
322
|
+
@staticmethod
|
|
323
|
+
def _node_has_line_info(node: ast.AST) -> bool:
|
|
324
|
+
"""Check if node has valid line number information."""
|
|
325
|
+
if not hasattr(node, "lineno") or not hasattr(node, "end_lineno"):
|
|
326
|
+
return False
|
|
327
|
+
return node.lineno is not None and node.end_lineno is not None
|
|
328
|
+
|
|
329
|
+
@staticmethod
|
|
330
|
+
def _add_node_to_index(node: ast.AST, line_to_nodes: dict[int, list[ast.AST]]) -> None:
|
|
331
|
+
"""Add node to all lines it overlaps in the index."""
|
|
332
|
+
for line_num in range(node.lineno, node.end_lineno + 1): # type: ignore[attr-defined]
|
|
333
|
+
if line_num not in line_to_nodes:
|
|
334
|
+
line_to_nodes[line_num] = []
|
|
335
|
+
line_to_nodes[line_num].append(node)
|
|
336
|
+
|
|
337
|
+
def _check_overlapping_nodes(self, tree: ast.Module, start_line: int, end_line: int) -> bool:
|
|
338
|
+
"""Check if any AST node overlaps and matches single-statement pattern.
|
|
339
|
+
|
|
340
|
+
Performance optimization: Use line-to-node index for O(1) lookups instead of O(n) ast.walk().
|
|
341
|
+
"""
|
|
342
|
+
if self._line_to_nodes is not None:
|
|
343
|
+
return self._check_nodes_via_index(start_line, end_line)
|
|
344
|
+
return self._check_nodes_via_walk(tree, start_line, end_line)
|
|
345
|
+
|
|
346
|
+
def _check_nodes_via_index(self, start_line: int, end_line: int) -> bool:
|
|
347
|
+
"""Check nodes using line-to-node index for O(1) lookups."""
|
|
348
|
+
candidates = self._collect_candidate_nodes_from_index(start_line, end_line)
|
|
349
|
+
return self._any_node_matches_pattern(candidates, start_line, end_line)
|
|
350
|
+
|
|
351
|
+
def _collect_candidate_nodes_from_index(self, start_line: int, end_line: int) -> set[ast.AST]:
|
|
352
|
+
"""Collect unique nodes that overlap with the line range from index."""
|
|
353
|
+
candidate_nodes: set[ast.AST] = set()
|
|
354
|
+
for line_num in range(start_line, end_line + 1):
|
|
355
|
+
if self._line_to_nodes and line_num in self._line_to_nodes:
|
|
356
|
+
candidate_nodes.update(self._line_to_nodes[line_num])
|
|
357
|
+
return candidate_nodes
|
|
358
|
+
|
|
359
|
+
def _any_node_matches_pattern(
|
|
360
|
+
self, nodes: set[ast.AST], start_line: int, end_line: int
|
|
361
|
+
) -> bool:
|
|
362
|
+
"""Check if any node matches single-statement pattern."""
|
|
363
|
+
for node in nodes:
|
|
277
364
|
if self._is_single_statement_pattern(node, start_line, end_line):
|
|
278
365
|
return True
|
|
279
366
|
return False
|
|
280
367
|
|
|
368
|
+
def _check_nodes_via_walk(self, tree: ast.Module, start_line: int, end_line: int) -> bool:
|
|
369
|
+
"""Check nodes using ast.walk() fallback for tests or standalone calls."""
|
|
370
|
+
for node in ast.walk(tree):
|
|
371
|
+
if self._node_matches_via_walk(node, start_line, end_line):
|
|
372
|
+
return True
|
|
373
|
+
return False
|
|
374
|
+
|
|
375
|
+
def _node_matches_via_walk(self, node: ast.AST, start_line: int, end_line: int) -> bool:
|
|
376
|
+
"""Check if a single node overlaps and matches pattern."""
|
|
377
|
+
if not self._node_overlaps_range(node, start_line, end_line):
|
|
378
|
+
return False
|
|
379
|
+
return self._is_single_statement_pattern(node, start_line, end_line)
|
|
380
|
+
|
|
381
|
+
@staticmethod
|
|
382
|
+
def _node_overlaps_range(node: ast.AST, start_line: int, end_line: int) -> bool:
|
|
383
|
+
"""Check if node overlaps with the given line range."""
|
|
384
|
+
if not hasattr(node, "lineno") or not hasattr(node, "end_lineno"):
|
|
385
|
+
return False
|
|
386
|
+
node_end = node.end_lineno
|
|
387
|
+
node_start = node.lineno
|
|
388
|
+
return not (node_end < start_line or node_start > end_line)
|
|
389
|
+
|
|
281
390
|
def _node_overlaps_and_matches(self, node: ast.AST, start_line: int, end_line: int) -> bool:
|
|
282
391
|
"""Check if node overlaps with range and matches single-statement pattern."""
|
|
283
392
|
if not hasattr(node, "lineno") or not hasattr(node, "end_lineno"):
|
|
@@ -33,26 +33,80 @@ class TokenHasher:
|
|
|
33
33
|
List of normalized code lines (non-empty, comments removed, imports filtered)
|
|
34
34
|
"""
|
|
35
35
|
lines = []
|
|
36
|
+
in_multiline_import = False
|
|
36
37
|
|
|
37
38
|
for line in code.split("\n"):
|
|
38
|
-
|
|
39
|
-
line = self._strip_comments(line)
|
|
40
|
-
|
|
41
|
-
# Normalize whitespace (collapse to single space)
|
|
42
|
-
line = " ".join(line.split())
|
|
43
|
-
|
|
44
|
-
# Skip empty lines
|
|
39
|
+
line = self._normalize_line(line)
|
|
45
40
|
if not line:
|
|
46
41
|
continue
|
|
47
42
|
|
|
48
|
-
#
|
|
49
|
-
|
|
43
|
+
# Update multi-line import state and check if line should be skipped
|
|
44
|
+
in_multiline_import, should_skip = self._should_skip_import_line(
|
|
45
|
+
line, in_multiline_import
|
|
46
|
+
)
|
|
47
|
+
if should_skip:
|
|
50
48
|
continue
|
|
51
49
|
|
|
52
50
|
lines.append(line)
|
|
53
51
|
|
|
54
52
|
return lines
|
|
55
53
|
|
|
54
|
+
def _normalize_line(self, line: str) -> str:
|
|
55
|
+
"""Normalize a line by removing comments and excess whitespace.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
line: Raw source code line
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
Normalized line (empty string if line has no content)
|
|
62
|
+
"""
|
|
63
|
+
line = self._strip_comments(line)
|
|
64
|
+
return " ".join(line.split())
|
|
65
|
+
|
|
66
|
+
def _should_skip_import_line(self, line: str, in_multiline_import: bool) -> tuple[bool, bool]:
|
|
67
|
+
"""Determine if an import line should be skipped.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
line: Normalized code line
|
|
71
|
+
in_multiline_import: Whether we're currently inside a multi-line import
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Tuple of (new_in_multiline_import_state, should_skip_line)
|
|
75
|
+
"""
|
|
76
|
+
if self._is_multiline_import_start(line):
|
|
77
|
+
return True, True
|
|
78
|
+
|
|
79
|
+
if in_multiline_import:
|
|
80
|
+
return self._handle_multiline_import_continuation(line)
|
|
81
|
+
|
|
82
|
+
if self._is_import_statement(line):
|
|
83
|
+
return False, True
|
|
84
|
+
|
|
85
|
+
return False, False
|
|
86
|
+
|
|
87
|
+
def _is_multiline_import_start(self, line: str) -> bool:
|
|
88
|
+
"""Check if line starts a multi-line import statement.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
line: Normalized code line
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
True if line starts a multi-line import (has opening paren but no closing)
|
|
95
|
+
"""
|
|
96
|
+
return self._is_import_statement(line) and "(" in line and ")" not in line
|
|
97
|
+
|
|
98
|
+
def _handle_multiline_import_continuation(self, line: str) -> tuple[bool, bool]:
|
|
99
|
+
"""Handle a line that's part of a multi-line import.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
line: Normalized code line inside a multi-line import
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
Tuple of (still_in_import, should_skip)
|
|
106
|
+
"""
|
|
107
|
+
closes_import = ")" in line
|
|
108
|
+
return not closes_import, True
|
|
109
|
+
|
|
56
110
|
def _strip_comments(self, line: str) -> str:
|
|
57
111
|
"""Remove comments from line (Python # and // style).
|
|
58
112
|
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""
|
|
2
|
+
File: src/linters/file_header/__init__.py
|
|
3
|
+
Purpose: File header linter module initialization
|
|
4
|
+
Exports: FileHeaderRule
|
|
5
|
+
Depends: linter.FileHeaderRule
|
|
6
|
+
Implements: Module-level exports for clean API
|
|
7
|
+
Related: linter.py for main rule implementation
|
|
8
|
+
|
|
9
|
+
Overview:
|
|
10
|
+
Initializes the file header linter module providing multi-language file header
|
|
11
|
+
validation with mandatory field checking, atemporal language detection, and configuration
|
|
12
|
+
support. Main entry point for file header linting functionality.
|
|
13
|
+
|
|
14
|
+
Usage:
|
|
15
|
+
from src.linters.file_header import FileHeaderRule
|
|
16
|
+
rule = FileHeaderRule()
|
|
17
|
+
violations = rule.check(context)
|
|
18
|
+
|
|
19
|
+
Notes: Follows standard Python module initialization pattern with __all__ export control
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from .linter import FileHeaderRule
|
|
23
|
+
|
|
24
|
+
__all__ = ["FileHeaderRule"]
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""
|
|
2
|
+
File: src/linters/file_header/atemporal_detector.py
|
|
3
|
+
Purpose: Detects temporal language patterns in file headers
|
|
4
|
+
Exports: AtemporalDetector class
|
|
5
|
+
Depends: re module for regex matching
|
|
6
|
+
Implements: Regex-based pattern matching with configurable patterns
|
|
7
|
+
Related: linter.py for detector usage, violation_builder.py for violation creation
|
|
8
|
+
|
|
9
|
+
Overview:
|
|
10
|
+
Implements pattern-based detection of temporal language that violates atemporal
|
|
11
|
+
documentation requirements. Detects dates, temporal qualifiers, state change language,
|
|
12
|
+
and future references using regex patterns. Provides violation details for each pattern match.
|
|
13
|
+
|
|
14
|
+
Usage:
|
|
15
|
+
detector = AtemporalDetector()
|
|
16
|
+
violations = detector.detect_violations(header_text)
|
|
17
|
+
|
|
18
|
+
Notes: Four pattern categories - dates, temporal qualifiers, state changes, future references
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import re
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class AtemporalDetector:
|
|
25
|
+
"""Detects temporal language patterns in text."""
|
|
26
|
+
|
|
27
|
+
# Date patterns
|
|
28
|
+
DATE_PATTERNS = [
|
|
29
|
+
(r"\d{4}-\d{2}-\d{2}", "ISO date format (YYYY-MM-DD)"),
|
|
30
|
+
(
|
|
31
|
+
r"(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{4}",
|
|
32
|
+
"Month Year format",
|
|
33
|
+
),
|
|
34
|
+
(r"(?:Created|Updated|Modified):\s*\d{4}", "Date metadata"),
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
# Temporal qualifiers
|
|
38
|
+
TEMPORAL_QUALIFIERS = [
|
|
39
|
+
(r"\bcurrently\b", 'temporal qualifier "currently"'),
|
|
40
|
+
(r"\bnow\b", 'temporal qualifier "now"'),
|
|
41
|
+
(r"\brecently\b", 'temporal qualifier "recently"'),
|
|
42
|
+
(r"\bsoon\b", 'temporal qualifier "soon"'),
|
|
43
|
+
(r"\bfor now\b", 'temporal qualifier "for now"'),
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
# State change language
|
|
47
|
+
STATE_CHANGE = [
|
|
48
|
+
(r"\breplaces?\b", 'state change "replaces"'),
|
|
49
|
+
(r"\bmigrated from\b", 'state change "migrated from"'),
|
|
50
|
+
(r"\bformerly\b", 'state change "formerly"'),
|
|
51
|
+
(r"\bold implementation\b", 'state change "old"'),
|
|
52
|
+
(r"\bnew implementation\b", 'state change "new"'),
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
# Future references
|
|
56
|
+
FUTURE_REFS = [
|
|
57
|
+
(r"\bwill be\b", 'future reference "will be"'),
|
|
58
|
+
(r"\bplanned\b", 'future reference "planned"'),
|
|
59
|
+
(r"\bto be added\b", 'future reference "to be added"'),
|
|
60
|
+
(r"\bcoming soon\b", 'future reference "coming soon"'),
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
def detect_violations( # thailint: ignore[nesting]
|
|
64
|
+
self, text: str
|
|
65
|
+
) -> list[tuple[str, str, int]]:
|
|
66
|
+
"""Detect all temporal language violations in text.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
text: Text to check for temporal language
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
List of (pattern, description, line_number) tuples for each violation
|
|
73
|
+
"""
|
|
74
|
+
violations = []
|
|
75
|
+
|
|
76
|
+
# Check all pattern categories
|
|
77
|
+
all_patterns = (
|
|
78
|
+
self.DATE_PATTERNS + self.TEMPORAL_QUALIFIERS + self.STATE_CHANGE + self.FUTURE_REFS
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
lines = text.split("\n")
|
|
82
|
+
for line_num, line in enumerate(lines, start=1):
|
|
83
|
+
for pattern, description in all_patterns:
|
|
84
|
+
if re.search(pattern, line, re.IGNORECASE):
|
|
85
|
+
violations.append((pattern, description, line_num))
|
|
86
|
+
|
|
87
|
+
return violations
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""
|
|
2
|
+
File: src/linters/file_header/config.py
|
|
3
|
+
Purpose: Configuration model for file header linter
|
|
4
|
+
Exports: FileHeaderConfig dataclass
|
|
5
|
+
Depends: dataclasses, pathlib
|
|
6
|
+
Implements: Configuration with validation and defaults
|
|
7
|
+
Related: linter.py for configuration usage
|
|
8
|
+
|
|
9
|
+
Overview:
|
|
10
|
+
Defines configuration structure for file header linter including required fields
|
|
11
|
+
per language, ignore patterns, and validation options. Provides defaults matching
|
|
12
|
+
ai-doc-standard.md requirements and supports loading from .thailint.yaml configuration.
|
|
13
|
+
|
|
14
|
+
Usage:
|
|
15
|
+
config = FileHeaderConfig()
|
|
16
|
+
config = FileHeaderConfig.from_dict(config_dict, "python")
|
|
17
|
+
|
|
18
|
+
Notes: Dataclass with validation and language-specific defaults
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from dataclasses import dataclass, field
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class FileHeaderConfig:
|
|
26
|
+
"""Configuration for file header linting."""
|
|
27
|
+
|
|
28
|
+
# Required fields by language
|
|
29
|
+
required_fields_python: list[str] = field(
|
|
30
|
+
default_factory=lambda: [
|
|
31
|
+
"Purpose",
|
|
32
|
+
"Scope",
|
|
33
|
+
"Overview",
|
|
34
|
+
"Dependencies",
|
|
35
|
+
"Exports",
|
|
36
|
+
"Interfaces",
|
|
37
|
+
"Implementation",
|
|
38
|
+
]
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# Enforce atemporal language checking
|
|
42
|
+
enforce_atemporal: bool = True
|
|
43
|
+
|
|
44
|
+
# Patterns to ignore (file paths)
|
|
45
|
+
ignore: list[str] = field(
|
|
46
|
+
default_factory=lambda: ["test/**", "**/migrations/**", "**/__init__.py"]
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def from_dict(cls, config_dict: dict, language: str) -> "FileHeaderConfig":
|
|
51
|
+
"""Create config from dictionary.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
config_dict: Dictionary of configuration values
|
|
55
|
+
language: Programming language for language-specific config
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
FileHeaderConfig instance with values from dictionary
|
|
59
|
+
"""
|
|
60
|
+
return cls(
|
|
61
|
+
required_fields_python=config_dict.get("required_fields", {}).get(
|
|
62
|
+
"python", cls().required_fields_python
|
|
63
|
+
),
|
|
64
|
+
enforce_atemporal=config_dict.get("enforce_atemporal", True),
|
|
65
|
+
ignore=config_dict.get("ignore", cls().ignore),
|
|
66
|
+
)
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""
|
|
2
|
+
File: src/linters/file_header/field_validator.py
|
|
3
|
+
Purpose: Validates mandatory fields in file headers
|
|
4
|
+
Exports: FieldValidator class
|
|
5
|
+
Depends: FileHeaderConfig for field requirements
|
|
6
|
+
Implements: Configuration-driven validation with field presence checking
|
|
7
|
+
Related: linter.py for validator usage, config.py for configuration
|
|
8
|
+
|
|
9
|
+
Overview:
|
|
10
|
+
Validates presence and quality of mandatory header fields. Checks that all
|
|
11
|
+
required fields are present, non-empty, and meet minimum content requirements.
|
|
12
|
+
Supports language-specific required fields and provides detailed violation messages.
|
|
13
|
+
|
|
14
|
+
Usage:
|
|
15
|
+
validator = FieldValidator(config)
|
|
16
|
+
violations = validator.validate_fields(fields, "python")
|
|
17
|
+
|
|
18
|
+
Notes: Language-specific field requirements defined in config
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from .config import FileHeaderConfig
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class FieldValidator:
|
|
25
|
+
"""Validates mandatory fields in headers."""
|
|
26
|
+
|
|
27
|
+
def __init__(self, config: FileHeaderConfig):
|
|
28
|
+
"""Initialize validator with configuration.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
config: File header configuration with required fields
|
|
32
|
+
"""
|
|
33
|
+
self.config = config
|
|
34
|
+
|
|
35
|
+
def validate_fields( # thailint: ignore[nesting]
|
|
36
|
+
self, fields: dict[str, str], language: str
|
|
37
|
+
) -> list[tuple[str, str]]:
|
|
38
|
+
"""Validate all required fields are present.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
fields: Dictionary of parsed header fields
|
|
42
|
+
language: File language (python, typescript, etc.)
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
List of (field_name, error_message) tuples for missing/invalid fields
|
|
46
|
+
"""
|
|
47
|
+
violations = []
|
|
48
|
+
required_fields = self._get_required_fields(language)
|
|
49
|
+
|
|
50
|
+
for field_name in required_fields:
|
|
51
|
+
if field_name not in fields:
|
|
52
|
+
violations.append((field_name, f"Missing mandatory field: {field_name}"))
|
|
53
|
+
elif not fields[field_name] or len(fields[field_name].strip()) == 0:
|
|
54
|
+
violations.append((field_name, f"Empty mandatory field: {field_name}"))
|
|
55
|
+
|
|
56
|
+
return violations
|
|
57
|
+
|
|
58
|
+
def _get_required_fields(self, language: str) -> list[str]:
|
|
59
|
+
"""Get required fields for language.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
language: Programming language
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
List of required field names for the language
|
|
66
|
+
"""
|
|
67
|
+
if language == "python":
|
|
68
|
+
return self.config.required_fields_python
|
|
69
|
+
return [] # Other languages in PR5
|