thailint 0.4.4__tar.gz → 0.4.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {thailint-0.4.4 → thailint-0.4.5}/PKG-INFO +1 -1
  2. {thailint-0.4.4 → thailint-0.4.5}/pyproject.toml +1 -1
  3. {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/python_analyzer.py +148 -39
  4. {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/token_hasher.py +63 -9
  5. thailint-0.4.5/src/linters/file_header/__init__.py +24 -0
  6. thailint-0.4.5/src/linters/file_header/atemporal_detector.py +87 -0
  7. thailint-0.4.5/src/linters/file_header/config.py +66 -0
  8. thailint-0.4.5/src/linters/file_header/field_validator.py +69 -0
  9. thailint-0.4.5/src/linters/file_header/linter.py +313 -0
  10. thailint-0.4.5/src/linters/file_header/python_parser.py +86 -0
  11. thailint-0.4.5/src/linters/file_header/violation_builder.py +78 -0
  12. {thailint-0.4.4 → thailint-0.4.5}/CHANGELOG.md +0 -0
  13. {thailint-0.4.4 → thailint-0.4.5}/LICENSE +0 -0
  14. {thailint-0.4.4 → thailint-0.4.5}/README.md +0 -0
  15. {thailint-0.4.4 → thailint-0.4.5}/src/__init__.py +0 -0
  16. {thailint-0.4.4 → thailint-0.4.5}/src/analyzers/__init__.py +0 -0
  17. {thailint-0.4.4 → thailint-0.4.5}/src/analyzers/typescript_base.py +0 -0
  18. {thailint-0.4.4 → thailint-0.4.5}/src/api.py +0 -0
  19. {thailint-0.4.4 → thailint-0.4.5}/src/cli.py +0 -0
  20. {thailint-0.4.4 → thailint-0.4.5}/src/config.py +0 -0
  21. {thailint-0.4.4 → thailint-0.4.5}/src/core/__init__.py +0 -0
  22. {thailint-0.4.4 → thailint-0.4.5}/src/core/base.py +0 -0
  23. {thailint-0.4.4 → thailint-0.4.5}/src/core/cli_utils.py +0 -0
  24. {thailint-0.4.4 → thailint-0.4.5}/src/core/config_parser.py +0 -0
  25. {thailint-0.4.4 → thailint-0.4.5}/src/core/linter_utils.py +0 -0
  26. {thailint-0.4.4 → thailint-0.4.5}/src/core/registry.py +0 -0
  27. {thailint-0.4.4 → thailint-0.4.5}/src/core/rule_discovery.py +0 -0
  28. {thailint-0.4.4 → thailint-0.4.5}/src/core/types.py +0 -0
  29. {thailint-0.4.4 → thailint-0.4.5}/src/core/violation_builder.py +0 -0
  30. {thailint-0.4.4 → thailint-0.4.5}/src/linter_config/__init__.py +0 -0
  31. {thailint-0.4.4 → thailint-0.4.5}/src/linter_config/ignore.py +0 -0
  32. {thailint-0.4.4 → thailint-0.4.5}/src/linter_config/loader.py +0 -0
  33. {thailint-0.4.4 → thailint-0.4.5}/src/linters/__init__.py +0 -0
  34. {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/__init__.py +0 -0
  35. {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/base_token_analyzer.py +0 -0
  36. {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/block_filter.py +0 -0
  37. {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/block_grouper.py +0 -0
  38. {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/cache.py +0 -0
  39. {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/cache_query.py +0 -0
  40. {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/config.py +0 -0
  41. {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/config_loader.py +0 -0
  42. {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/deduplicator.py +0 -0
  43. {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/duplicate_storage.py +0 -0
  44. {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/file_analyzer.py +0 -0
  45. {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/inline_ignore.py +0 -0
  46. {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/linter.py +0 -0
  47. {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/storage_initializer.py +0 -0
  48. {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/typescript_analyzer.py +0 -0
  49. {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/violation_builder.py +0 -0
  50. {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/violation_filter.py +0 -0
  51. {thailint-0.4.4 → thailint-0.4.5}/src/linters/dry/violation_generator.py +0 -0
  52. {thailint-0.4.4 → thailint-0.4.5}/src/linters/file_placement/__init__.py +0 -0
  53. {thailint-0.4.4 → thailint-0.4.5}/src/linters/file_placement/config_loader.py +0 -0
  54. {thailint-0.4.4 → thailint-0.4.5}/src/linters/file_placement/directory_matcher.py +0 -0
  55. {thailint-0.4.4 → thailint-0.4.5}/src/linters/file_placement/linter.py +0 -0
  56. {thailint-0.4.4 → thailint-0.4.5}/src/linters/file_placement/path_resolver.py +0 -0
  57. {thailint-0.4.4 → thailint-0.4.5}/src/linters/file_placement/pattern_matcher.py +0 -0
  58. {thailint-0.4.4 → thailint-0.4.5}/src/linters/file_placement/pattern_validator.py +0 -0
  59. {thailint-0.4.4 → thailint-0.4.5}/src/linters/file_placement/rule_checker.py +0 -0
  60. {thailint-0.4.4 → thailint-0.4.5}/src/linters/file_placement/violation_factory.py +0 -0
  61. {thailint-0.4.4 → thailint-0.4.5}/src/linters/magic_numbers/__init__.py +0 -0
  62. {thailint-0.4.4 → thailint-0.4.5}/src/linters/magic_numbers/config.py +0 -0
  63. {thailint-0.4.4 → thailint-0.4.5}/src/linters/magic_numbers/context_analyzer.py +0 -0
  64. {thailint-0.4.4 → thailint-0.4.5}/src/linters/magic_numbers/linter.py +0 -0
  65. {thailint-0.4.4 → thailint-0.4.5}/src/linters/magic_numbers/python_analyzer.py +0 -0
  66. {thailint-0.4.4 → thailint-0.4.5}/src/linters/magic_numbers/typescript_analyzer.py +0 -0
  67. {thailint-0.4.4 → thailint-0.4.5}/src/linters/magic_numbers/violation_builder.py +0 -0
  68. {thailint-0.4.4 → thailint-0.4.5}/src/linters/nesting/__init__.py +0 -0
  69. {thailint-0.4.4 → thailint-0.4.5}/src/linters/nesting/config.py +0 -0
  70. {thailint-0.4.4 → thailint-0.4.5}/src/linters/nesting/linter.py +0 -0
  71. {thailint-0.4.4 → thailint-0.4.5}/src/linters/nesting/python_analyzer.py +0 -0
  72. {thailint-0.4.4 → thailint-0.4.5}/src/linters/nesting/typescript_analyzer.py +0 -0
  73. {thailint-0.4.4 → thailint-0.4.5}/src/linters/nesting/typescript_function_extractor.py +0 -0
  74. {thailint-0.4.4 → thailint-0.4.5}/src/linters/nesting/violation_builder.py +0 -0
  75. {thailint-0.4.4 → thailint-0.4.5}/src/linters/srp/__init__.py +0 -0
  76. {thailint-0.4.4 → thailint-0.4.5}/src/linters/srp/class_analyzer.py +0 -0
  77. {thailint-0.4.4 → thailint-0.4.5}/src/linters/srp/config.py +0 -0
  78. {thailint-0.4.4 → thailint-0.4.5}/src/linters/srp/heuristics.py +0 -0
  79. {thailint-0.4.4 → thailint-0.4.5}/src/linters/srp/linter.py +0 -0
  80. {thailint-0.4.4 → thailint-0.4.5}/src/linters/srp/metrics_evaluator.py +0 -0
  81. {thailint-0.4.4 → thailint-0.4.5}/src/linters/srp/python_analyzer.py +0 -0
  82. {thailint-0.4.4 → thailint-0.4.5}/src/linters/srp/typescript_analyzer.py +0 -0
  83. {thailint-0.4.4 → thailint-0.4.5}/src/linters/srp/typescript_metrics_calculator.py +0 -0
  84. {thailint-0.4.4 → thailint-0.4.5}/src/linters/srp/violation_builder.py +0 -0
  85. {thailint-0.4.4 → thailint-0.4.5}/src/orchestrator/__init__.py +0 -0
  86. {thailint-0.4.4 → thailint-0.4.5}/src/orchestrator/core.py +0 -0
  87. {thailint-0.4.4 → thailint-0.4.5}/src/orchestrator/language_detector.py +0 -0
  88. {thailint-0.4.4 → thailint-0.4.5}/src/templates/thailint_config_template.yaml +0 -0
  89. {thailint-0.4.4 → thailint-0.4.5}/src/utils/__init__.py +0 -0
  90. {thailint-0.4.4 → thailint-0.4.5}/src/utils/project_root.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: thailint
3
- Version: 0.4.4
3
+ Version: 0.4.5
4
4
  Summary: The AI Linter - Enterprise-grade linting and governance for AI-generated code across multiple languages
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -17,7 +17,7 @@ build-backend = "poetry.core.masonry.api"
17
17
 
18
18
  [tool.poetry]
19
19
  name = "thailint"
20
- version = "0.4.4"
20
+ version = "0.4.5"
21
21
  description = "The AI Linter - Enterprise-grade linting and governance for AI-generated code across multiple languages"
22
22
  authors = ["Steve Jackson"]
23
23
  license = "MIT"
@@ -65,8 +65,12 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
65
65
  # Performance optimization: Cache parsed AST to avoid re-parsing for each hash window
66
66
  self._cached_ast: ast.Module | None = None
67
67
  self._cached_content: str | None = None
68
+ # Performance optimization: Line-to-node index for O(1) lookups instead of O(n) ast.walk()
69
+ self._line_to_nodes: dict[int, list[ast.AST]] | None = None
68
70
 
69
- def analyze(self, file_path: Path, content: str, config: DRYConfig) -> list[CodeBlock]:
71
+ def analyze( # thailint: ignore[nesting.excessive-depth]
72
+ self, file_path: Path, content: str, config: DRYConfig
73
+ ) -> list[CodeBlock]:
70
74
  """Analyze Python file for duplicate code blocks, excluding docstrings.
71
75
 
72
76
  Args:
@@ -81,6 +85,9 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
81
85
  self._cached_ast = self._parse_content_safe(content)
82
86
  self._cached_content = content
83
87
 
88
+ # Performance optimization: Build line-to-node index for O(1) lookups
89
+ self._line_to_nodes = self._build_line_to_node_index(self._cached_ast)
90
+
84
91
  try:
85
92
  # Get docstring line ranges
86
93
  docstring_ranges = self._get_docstring_ranges_from_content(content)
@@ -89,34 +96,58 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
89
96
  lines_with_numbers = self._tokenize_with_line_numbers(content, docstring_ranges)
90
97
 
91
98
  # Generate rolling hash windows
92
- windows = self._rolling_hash_with_tracking(lines_with_numbers, config.min_duplicate_lines)
93
-
94
- blocks = []
95
- for hash_val, start_line, end_line, snippet in windows:
96
- # Skip blocks that are single logical statements
97
- # Check the original source code, not the normalized snippet
98
- if self._is_single_statement_in_source(content, start_line, end_line):
99
- continue
100
-
101
- block = CodeBlock(
102
- file_path=file_path,
103
- start_line=start_line,
104
- end_line=end_line,
105
- snippet=snippet,
106
- hash_value=hash_val,
107
- )
108
-
109
- # Apply extensible filters (keyword arguments, imports, etc.)
110
- if self._filter_registry.should_filter_block(block, content):
111
- continue
112
-
113
- blocks.append(block)
99
+ windows = self._rolling_hash_with_tracking(
100
+ lines_with_numbers, config.min_duplicate_lines
101
+ )
114
102
 
115
- return blocks
103
+ return self._filter_valid_blocks(windows, file_path, content)
116
104
  finally:
117
105
  # Clear cache after analysis to avoid memory leaks
118
106
  self._cached_ast = None
119
107
  self._cached_content = None
108
+ self._line_to_nodes = None
109
+
110
+ def _filter_valid_blocks(
111
+ self,
112
+ windows: list[tuple[int, int, int, str]],
113
+ file_path: Path,
114
+ content: str,
115
+ ) -> list[CodeBlock]:
116
+ """Filter hash windows and create valid CodeBlock instances."""
117
+ blocks = []
118
+ for hash_val, start_line, end_line, snippet in windows:
119
+ block = self._create_block_if_valid(
120
+ file_path, content, hash_val, start_line, end_line, snippet
121
+ )
122
+ if block:
123
+ blocks.append(block)
124
+ return blocks
125
+
126
+ def _create_block_if_valid( # pylint: disable=too-many-arguments,too-many-positional-arguments
127
+ self,
128
+ file_path: Path,
129
+ content: str,
130
+ hash_val: int,
131
+ start_line: int,
132
+ end_line: int,
133
+ snippet: str,
134
+ ) -> CodeBlock | None:
135
+ """Create CodeBlock if it passes all validation checks."""
136
+ if self._is_single_statement_in_source(content, start_line, end_line):
137
+ return None
138
+
139
+ block = CodeBlock(
140
+ file_path=file_path,
141
+ start_line=start_line,
142
+ end_line=end_line,
143
+ snippet=snippet,
144
+ hash_value=hash_val,
145
+ )
146
+
147
+ if self._filter_registry.should_filter_block(block, content):
148
+ return None
149
+
150
+ return block
120
151
 
121
152
  def _get_docstring_ranges_from_content(self, content: str) -> set[int]:
122
153
  """Extract line numbers that are part of docstrings.
@@ -184,20 +215,21 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
184
215
  List of (original_line_number, normalized_code) tuples
185
216
  """
186
217
  lines_with_numbers = []
218
+ in_multiline_import = False
187
219
 
188
220
  for line_num, line in enumerate(content.split("\n"), start=1):
189
- # Skip docstring lines
190
221
  if line_num in docstring_lines:
191
222
  continue
192
223
 
193
- # Use hasher's existing tokenization logic
194
- line = self._hasher._strip_comments(line) # pylint: disable=protected-access
195
- line = " ".join(line.split())
196
-
224
+ line = self._hasher._normalize_line(line) # pylint: disable=protected-access
197
225
  if not line:
198
226
  continue
199
227
 
200
- if self._hasher._is_import_statement(line): # pylint: disable=protected-access
228
+ # Update multi-line import state and check if line should be skipped
229
+ in_multiline_import, should_skip = self._hasher._should_skip_import_line( # pylint: disable=protected-access
230
+ line, in_multiline_import
231
+ )
232
+ if should_skip:
201
233
  continue
202
234
 
203
235
  lines_with_numbers.append((line_num, line))
@@ -243,6 +275,7 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
243
275
  to avoid re-parsing the entire file for each hash window check.
244
276
  """
245
277
  # Use cached AST if available and content matches
278
+ tree: ast.Module | None
246
279
  if self._cached_ast is not None and content == self._cached_content:
247
280
  tree = self._cached_ast
248
281
  else:
@@ -261,23 +294,99 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
261
294
  except SyntaxError:
262
295
  return None
263
296
 
264
- def _check_overlapping_nodes(self, tree: ast.Module, start_line: int, end_line: int) -> bool:
265
- """Check if any AST node overlaps and matches single-statement pattern.
297
+ @staticmethod
298
+ def _build_line_to_node_index(tree: ast.Module | None) -> dict[int, list[ast.AST]] | None:
299
+ """Build an index mapping each line number to overlapping AST nodes.
300
+
301
+ Performance optimization: This allows O(1) lookups instead of O(n) ast.walk() calls.
302
+ For a file with 5,144 nodes and 673 hash windows, this reduces 3.46M node operations
303
+ to just ~3,365 relevant node checks (99.9% reduction).
266
304
 
267
- Performance optimization: Pre-filter nodes by line range before expensive pattern checks.
305
+ Args:
306
+ tree: Parsed AST tree (None if parsing failed)
307
+
308
+ Returns:
309
+ Dictionary mapping line numbers to list of AST nodes overlapping that line,
310
+ or None if tree is None
268
311
  """
312
+ if tree is None:
313
+ return None
314
+
315
+ line_to_nodes: dict[int, list[ast.AST]] = {}
269
316
  for node in ast.walk(tree):
270
- # Quick line range check to skip nodes that don't overlap
271
- if not hasattr(node, "lineno") or not hasattr(node, "end_lineno"):
272
- continue
273
- if node.end_lineno < start_line or node.lineno > end_line:
274
- continue # No overlap, skip expensive pattern matching
317
+ if PythonDuplicateAnalyzer._node_has_line_info(node):
318
+ PythonDuplicateAnalyzer._add_node_to_index(node, line_to_nodes)
319
+
320
+ return line_to_nodes
275
321
 
276
- # Node overlaps - check if it matches single-statement pattern
322
+ @staticmethod
323
+ def _node_has_line_info(node: ast.AST) -> bool:
324
+ """Check if node has valid line number information."""
325
+ if not hasattr(node, "lineno") or not hasattr(node, "end_lineno"):
326
+ return False
327
+ return node.lineno is not None and node.end_lineno is not None
328
+
329
+ @staticmethod
330
+ def _add_node_to_index(node: ast.AST, line_to_nodes: dict[int, list[ast.AST]]) -> None:
331
+ """Add node to all lines it overlaps in the index."""
332
+ for line_num in range(node.lineno, node.end_lineno + 1): # type: ignore[attr-defined]
333
+ if line_num not in line_to_nodes:
334
+ line_to_nodes[line_num] = []
335
+ line_to_nodes[line_num].append(node)
336
+
337
+ def _check_overlapping_nodes(self, tree: ast.Module, start_line: int, end_line: int) -> bool:
338
+ """Check if any AST node overlaps and matches single-statement pattern.
339
+
340
+ Performance optimization: Use line-to-node index for O(1) lookups instead of O(n) ast.walk().
341
+ """
342
+ if self._line_to_nodes is not None:
343
+ return self._check_nodes_via_index(start_line, end_line)
344
+ return self._check_nodes_via_walk(tree, start_line, end_line)
345
+
346
+ def _check_nodes_via_index(self, start_line: int, end_line: int) -> bool:
347
+ """Check nodes using line-to-node index for O(1) lookups."""
348
+ candidates = self._collect_candidate_nodes_from_index(start_line, end_line)
349
+ return self._any_node_matches_pattern(candidates, start_line, end_line)
350
+
351
+ def _collect_candidate_nodes_from_index(self, start_line: int, end_line: int) -> set[ast.AST]:
352
+ """Collect unique nodes that overlap with the line range from index."""
353
+ candidate_nodes: set[ast.AST] = set()
354
+ for line_num in range(start_line, end_line + 1):
355
+ if self._line_to_nodes and line_num in self._line_to_nodes:
356
+ candidate_nodes.update(self._line_to_nodes[line_num])
357
+ return candidate_nodes
358
+
359
+ def _any_node_matches_pattern(
360
+ self, nodes: set[ast.AST], start_line: int, end_line: int
361
+ ) -> bool:
362
+ """Check if any node matches single-statement pattern."""
363
+ for node in nodes:
277
364
  if self._is_single_statement_pattern(node, start_line, end_line):
278
365
  return True
279
366
  return False
280
367
 
368
+ def _check_nodes_via_walk(self, tree: ast.Module, start_line: int, end_line: int) -> bool:
369
+ """Check nodes using ast.walk() fallback for tests or standalone calls."""
370
+ for node in ast.walk(tree):
371
+ if self._node_matches_via_walk(node, start_line, end_line):
372
+ return True
373
+ return False
374
+
375
+ def _node_matches_via_walk(self, node: ast.AST, start_line: int, end_line: int) -> bool:
376
+ """Check if a single node overlaps and matches pattern."""
377
+ if not self._node_overlaps_range(node, start_line, end_line):
378
+ return False
379
+ return self._is_single_statement_pattern(node, start_line, end_line)
380
+
381
+ @staticmethod
382
+ def _node_overlaps_range(node: ast.AST, start_line: int, end_line: int) -> bool:
383
+ """Check if node overlaps with the given line range."""
384
+ if not hasattr(node, "lineno") or not hasattr(node, "end_lineno"):
385
+ return False
386
+ node_end = node.end_lineno
387
+ node_start = node.lineno
388
+ return not (node_end < start_line or node_start > end_line)
389
+
281
390
  def _node_overlaps_and_matches(self, node: ast.AST, start_line: int, end_line: int) -> bool:
282
391
  """Check if node overlaps with range and matches single-statement pattern."""
283
392
  if not hasattr(node, "lineno") or not hasattr(node, "end_lineno"):
@@ -33,26 +33,80 @@ class TokenHasher:
33
33
  List of normalized code lines (non-empty, comments removed, imports filtered)
34
34
  """
35
35
  lines = []
36
+ in_multiline_import = False
36
37
 
37
38
  for line in code.split("\n"):
38
- # Remove comments (language-specific logic can be added)
39
- line = self._strip_comments(line)
40
-
41
- # Normalize whitespace (collapse to single space)
42
- line = " ".join(line.split())
43
-
44
- # Skip empty lines
39
+ line = self._normalize_line(line)
45
40
  if not line:
46
41
  continue
47
42
 
48
- # Skip import statements (common false positive)
49
- if self._is_import_statement(line):
43
+ # Update multi-line import state and check if line should be skipped
44
+ in_multiline_import, should_skip = self._should_skip_import_line(
45
+ line, in_multiline_import
46
+ )
47
+ if should_skip:
50
48
  continue
51
49
 
52
50
  lines.append(line)
53
51
 
54
52
  return lines
55
53
 
54
+ def _normalize_line(self, line: str) -> str:
55
+ """Normalize a line by removing comments and excess whitespace.
56
+
57
+ Args:
58
+ line: Raw source code line
59
+
60
+ Returns:
61
+ Normalized line (empty string if line has no content)
62
+ """
63
+ line = self._strip_comments(line)
64
+ return " ".join(line.split())
65
+
66
+ def _should_skip_import_line(self, line: str, in_multiline_import: bool) -> tuple[bool, bool]:
67
+ """Determine if an import line should be skipped.
68
+
69
+ Args:
70
+ line: Normalized code line
71
+ in_multiline_import: Whether we're currently inside a multi-line import
72
+
73
+ Returns:
74
+ Tuple of (new_in_multiline_import_state, should_skip_line)
75
+ """
76
+ if self._is_multiline_import_start(line):
77
+ return True, True
78
+
79
+ if in_multiline_import:
80
+ return self._handle_multiline_import_continuation(line)
81
+
82
+ if self._is_import_statement(line):
83
+ return False, True
84
+
85
+ return False, False
86
+
87
+ def _is_multiline_import_start(self, line: str) -> bool:
88
+ """Check if line starts a multi-line import statement.
89
+
90
+ Args:
91
+ line: Normalized code line
92
+
93
+ Returns:
94
+ True if line starts a multi-line import (has opening paren but no closing)
95
+ """
96
+ return self._is_import_statement(line) and "(" in line and ")" not in line
97
+
98
+ def _handle_multiline_import_continuation(self, line: str) -> tuple[bool, bool]:
99
+ """Handle a line that's part of a multi-line import.
100
+
101
+ Args:
102
+ line: Normalized code line inside a multi-line import
103
+
104
+ Returns:
105
+ Tuple of (still_in_import, should_skip)
106
+ """
107
+ closes_import = ")" in line
108
+ return not closes_import, True
109
+
56
110
  def _strip_comments(self, line: str) -> str:
57
111
  """Remove comments from line (Python # and // style).
58
112
 
@@ -0,0 +1,24 @@
1
+ """
2
+ File: src/linters/file_header/__init__.py
3
+ Purpose: File header linter module initialization
4
+ Exports: FileHeaderRule
5
+ Depends: linter.FileHeaderRule
6
+ Implements: Module-level exports for clean API
7
+ Related: linter.py for main rule implementation
8
+
9
+ Overview:
10
+ Initializes the file header linter module providing multi-language file header
11
+ validation with mandatory field checking, atemporal language detection, and configuration
12
+ support. Main entry point for file header linting functionality.
13
+
14
+ Usage:
15
+ from src.linters.file_header import FileHeaderRule
16
+ rule = FileHeaderRule()
17
+ violations = rule.check(context)
18
+
19
+ Notes: Follows standard Python module initialization pattern with __all__ export control
20
+ """
21
+
22
+ from .linter import FileHeaderRule
23
+
24
+ __all__ = ["FileHeaderRule"]
@@ -0,0 +1,87 @@
1
+ """
2
+ File: src/linters/file_header/atemporal_detector.py
3
+ Purpose: Detects temporal language patterns in file headers
4
+ Exports: AtemporalDetector class
5
+ Depends: re module for regex matching
6
+ Implements: Regex-based pattern matching with configurable patterns
7
+ Related: linter.py for detector usage, violation_builder.py for violation creation
8
+
9
+ Overview:
10
+ Implements pattern-based detection of temporal language that violates atemporal
11
+ documentation requirements. Detects dates, temporal qualifiers, state change language,
12
+ and future references using regex patterns. Provides violation details for each pattern match.
13
+
14
+ Usage:
15
+ detector = AtemporalDetector()
16
+ violations = detector.detect_violations(header_text)
17
+
18
+ Notes: Four pattern categories - dates, temporal qualifiers, state changes, future references
19
+ """
20
+
21
+ import re
22
+
23
+
24
+ class AtemporalDetector:
25
+ """Detects temporal language patterns in text."""
26
+
27
+ # Date patterns
28
+ DATE_PATTERNS = [
29
+ (r"\d{4}-\d{2}-\d{2}", "ISO date format (YYYY-MM-DD)"),
30
+ (
31
+ r"(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{4}",
32
+ "Month Year format",
33
+ ),
34
+ (r"(?:Created|Updated|Modified):\s*\d{4}", "Date metadata"),
35
+ ]
36
+
37
+ # Temporal qualifiers
38
+ TEMPORAL_QUALIFIERS = [
39
+ (r"\bcurrently\b", 'temporal qualifier "currently"'),
40
+ (r"\bnow\b", 'temporal qualifier "now"'),
41
+ (r"\brecently\b", 'temporal qualifier "recently"'),
42
+ (r"\bsoon\b", 'temporal qualifier "soon"'),
43
+ (r"\bfor now\b", 'temporal qualifier "for now"'),
44
+ ]
45
+
46
+ # State change language
47
+ STATE_CHANGE = [
48
+ (r"\breplaces?\b", 'state change "replaces"'),
49
+ (r"\bmigrated from\b", 'state change "migrated from"'),
50
+ (r"\bformerly\b", 'state change "formerly"'),
51
+ (r"\bold implementation\b", 'state change "old"'),
52
+ (r"\bnew implementation\b", 'state change "new"'),
53
+ ]
54
+
55
+ # Future references
56
+ FUTURE_REFS = [
57
+ (r"\bwill be\b", 'future reference "will be"'),
58
+ (r"\bplanned\b", 'future reference "planned"'),
59
+ (r"\bto be added\b", 'future reference "to be added"'),
60
+ (r"\bcoming soon\b", 'future reference "coming soon"'),
61
+ ]
62
+
63
+ def detect_violations( # thailint: ignore[nesting]
64
+ self, text: str
65
+ ) -> list[tuple[str, str, int]]:
66
+ """Detect all temporal language violations in text.
67
+
68
+ Args:
69
+ text: Text to check for temporal language
70
+
71
+ Returns:
72
+ List of (pattern, description, line_number) tuples for each violation
73
+ """
74
+ violations = []
75
+
76
+ # Check all pattern categories
77
+ all_patterns = (
78
+ self.DATE_PATTERNS + self.TEMPORAL_QUALIFIERS + self.STATE_CHANGE + self.FUTURE_REFS
79
+ )
80
+
81
+ lines = text.split("\n")
82
+ for line_num, line in enumerate(lines, start=1):
83
+ for pattern, description in all_patterns:
84
+ if re.search(pattern, line, re.IGNORECASE):
85
+ violations.append((pattern, description, line_num))
86
+
87
+ return violations
@@ -0,0 +1,66 @@
1
+ """
2
+ File: src/linters/file_header/config.py
3
+ Purpose: Configuration model for file header linter
4
+ Exports: FileHeaderConfig dataclass
5
+ Depends: dataclasses, pathlib
6
+ Implements: Configuration with validation and defaults
7
+ Related: linter.py for configuration usage
8
+
9
+ Overview:
10
+ Defines configuration structure for file header linter including required fields
11
+ per language, ignore patterns, and validation options. Provides defaults matching
12
+ ai-doc-standard.md requirements and supports loading from .thailint.yaml configuration.
13
+
14
+ Usage:
15
+ config = FileHeaderConfig()
16
+ config = FileHeaderConfig.from_dict(config_dict, "python")
17
+
18
+ Notes: Dataclass with validation and language-specific defaults
19
+ """
20
+
21
+ from dataclasses import dataclass, field
22
+
23
+
24
+ @dataclass
25
+ class FileHeaderConfig:
26
+ """Configuration for file header linting."""
27
+
28
+ # Required fields by language
29
+ required_fields_python: list[str] = field(
30
+ default_factory=lambda: [
31
+ "Purpose",
32
+ "Scope",
33
+ "Overview",
34
+ "Dependencies",
35
+ "Exports",
36
+ "Interfaces",
37
+ "Implementation",
38
+ ]
39
+ )
40
+
41
+ # Enforce atemporal language checking
42
+ enforce_atemporal: bool = True
43
+
44
+ # Patterns to ignore (file paths)
45
+ ignore: list[str] = field(
46
+ default_factory=lambda: ["test/**", "**/migrations/**", "**/__init__.py"]
47
+ )
48
+
49
+ @classmethod
50
+ def from_dict(cls, config_dict: dict, language: str) -> "FileHeaderConfig":
51
+ """Create config from dictionary.
52
+
53
+ Args:
54
+ config_dict: Dictionary of configuration values
55
+ language: Programming language for language-specific config
56
+
57
+ Returns:
58
+ FileHeaderConfig instance with values from dictionary
59
+ """
60
+ return cls(
61
+ required_fields_python=config_dict.get("required_fields", {}).get(
62
+ "python", cls().required_fields_python
63
+ ),
64
+ enforce_atemporal=config_dict.get("enforce_atemporal", True),
65
+ ignore=config_dict.get("ignore", cls().ignore),
66
+ )
@@ -0,0 +1,69 @@
1
+ """
2
+ File: src/linters/file_header/field_validator.py
3
+ Purpose: Validates mandatory fields in file headers
4
+ Exports: FieldValidator class
5
+ Depends: FileHeaderConfig for field requirements
6
+ Implements: Configuration-driven validation with field presence checking
7
+ Related: linter.py for validator usage, config.py for configuration
8
+
9
+ Overview:
10
+ Validates presence and quality of mandatory header fields. Checks that all
11
+ required fields are present, non-empty, and meet minimum content requirements.
12
+ Supports language-specific required fields and provides detailed violation messages.
13
+
14
+ Usage:
15
+ validator = FieldValidator(config)
16
+ violations = validator.validate_fields(fields, "python")
17
+
18
+ Notes: Language-specific field requirements defined in config
19
+ """
20
+
21
+ from .config import FileHeaderConfig
22
+
23
+
24
+ class FieldValidator:
25
+ """Validates mandatory fields in headers."""
26
+
27
+ def __init__(self, config: FileHeaderConfig):
28
+ """Initialize validator with configuration.
29
+
30
+ Args:
31
+ config: File header configuration with required fields
32
+ """
33
+ self.config = config
34
+
35
+ def validate_fields( # thailint: ignore[nesting]
36
+ self, fields: dict[str, str], language: str
37
+ ) -> list[tuple[str, str]]:
38
+ """Validate all required fields are present.
39
+
40
+ Args:
41
+ fields: Dictionary of parsed header fields
42
+ language: File language (python, typescript, etc.)
43
+
44
+ Returns:
45
+ List of (field_name, error_message) tuples for missing/invalid fields
46
+ """
47
+ violations = []
48
+ required_fields = self._get_required_fields(language)
49
+
50
+ for field_name in required_fields:
51
+ if field_name not in fields:
52
+ violations.append((field_name, f"Missing mandatory field: {field_name}"))
53
+ elif not fields[field_name] or len(fields[field_name].strip()) == 0:
54
+ violations.append((field_name, f"Empty mandatory field: {field_name}"))
55
+
56
+ return violations
57
+
58
+ def _get_required_fields(self, language: str) -> list[str]:
59
+ """Get required fields for language.
60
+
61
+ Args:
62
+ language: Programming language
63
+
64
+ Returns:
65
+ List of required field names for the language
66
+ """
67
+ if language == "python":
68
+ return self.config.required_fields_python
69
+ return [] # Other languages in PR5