thailint 0.4.3__tar.gz → 0.4.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {thailint-0.4.3 → thailint-0.4.5}/PKG-INFO +4 -2
  2. {thailint-0.4.3 → thailint-0.4.5}/pyproject.toml +1 -1
  3. {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/python_analyzer.py +176 -36
  4. {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/token_hasher.py +63 -9
  5. thailint-0.4.5/src/linters/file_header/__init__.py +24 -0
  6. thailint-0.4.5/src/linters/file_header/atemporal_detector.py +87 -0
  7. thailint-0.4.5/src/linters/file_header/config.py +66 -0
  8. thailint-0.4.5/src/linters/file_header/field_validator.py +69 -0
  9. thailint-0.4.5/src/linters/file_header/linter.py +313 -0
  10. thailint-0.4.5/src/linters/file_header/python_parser.py +86 -0
  11. thailint-0.4.5/src/linters/file_header/violation_builder.py +78 -0
  12. {thailint-0.4.3 → thailint-0.4.5}/src/orchestrator/core.py +12 -2
  13. {thailint-0.4.3 → thailint-0.4.5}/CHANGELOG.md +0 -0
  14. {thailint-0.4.3 → thailint-0.4.5}/LICENSE +0 -0
  15. {thailint-0.4.3 → thailint-0.4.5}/README.md +0 -0
  16. {thailint-0.4.3 → thailint-0.4.5}/src/__init__.py +0 -0
  17. {thailint-0.4.3 → thailint-0.4.5}/src/analyzers/__init__.py +0 -0
  18. {thailint-0.4.3 → thailint-0.4.5}/src/analyzers/typescript_base.py +0 -0
  19. {thailint-0.4.3 → thailint-0.4.5}/src/api.py +0 -0
  20. {thailint-0.4.3 → thailint-0.4.5}/src/cli.py +0 -0
  21. {thailint-0.4.3 → thailint-0.4.5}/src/config.py +0 -0
  22. {thailint-0.4.3 → thailint-0.4.5}/src/core/__init__.py +0 -0
  23. {thailint-0.4.3 → thailint-0.4.5}/src/core/base.py +0 -0
  24. {thailint-0.4.3 → thailint-0.4.5}/src/core/cli_utils.py +0 -0
  25. {thailint-0.4.3 → thailint-0.4.5}/src/core/config_parser.py +0 -0
  26. {thailint-0.4.3 → thailint-0.4.5}/src/core/linter_utils.py +0 -0
  27. {thailint-0.4.3 → thailint-0.4.5}/src/core/registry.py +0 -0
  28. {thailint-0.4.3 → thailint-0.4.5}/src/core/rule_discovery.py +0 -0
  29. {thailint-0.4.3 → thailint-0.4.5}/src/core/types.py +0 -0
  30. {thailint-0.4.3 → thailint-0.4.5}/src/core/violation_builder.py +0 -0
  31. {thailint-0.4.3 → thailint-0.4.5}/src/linter_config/__init__.py +0 -0
  32. {thailint-0.4.3 → thailint-0.4.5}/src/linter_config/ignore.py +0 -0
  33. {thailint-0.4.3 → thailint-0.4.5}/src/linter_config/loader.py +0 -0
  34. {thailint-0.4.3 → thailint-0.4.5}/src/linters/__init__.py +0 -0
  35. {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/__init__.py +0 -0
  36. {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/base_token_analyzer.py +0 -0
  37. {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/block_filter.py +0 -0
  38. {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/block_grouper.py +0 -0
  39. {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/cache.py +0 -0
  40. {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/cache_query.py +0 -0
  41. {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/config.py +0 -0
  42. {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/config_loader.py +0 -0
  43. {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/deduplicator.py +0 -0
  44. {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/duplicate_storage.py +0 -0
  45. {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/file_analyzer.py +0 -0
  46. {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/inline_ignore.py +0 -0
  47. {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/linter.py +0 -0
  48. {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/storage_initializer.py +0 -0
  49. {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/typescript_analyzer.py +0 -0
  50. {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/violation_builder.py +0 -0
  51. {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/violation_filter.py +0 -0
  52. {thailint-0.4.3 → thailint-0.4.5}/src/linters/dry/violation_generator.py +0 -0
  53. {thailint-0.4.3 → thailint-0.4.5}/src/linters/file_placement/__init__.py +0 -0
  54. {thailint-0.4.3 → thailint-0.4.5}/src/linters/file_placement/config_loader.py +0 -0
  55. {thailint-0.4.3 → thailint-0.4.5}/src/linters/file_placement/directory_matcher.py +0 -0
  56. {thailint-0.4.3 → thailint-0.4.5}/src/linters/file_placement/linter.py +0 -0
  57. {thailint-0.4.3 → thailint-0.4.5}/src/linters/file_placement/path_resolver.py +0 -0
  58. {thailint-0.4.3 → thailint-0.4.5}/src/linters/file_placement/pattern_matcher.py +0 -0
  59. {thailint-0.4.3 → thailint-0.4.5}/src/linters/file_placement/pattern_validator.py +0 -0
  60. {thailint-0.4.3 → thailint-0.4.5}/src/linters/file_placement/rule_checker.py +0 -0
  61. {thailint-0.4.3 → thailint-0.4.5}/src/linters/file_placement/violation_factory.py +0 -0
  62. {thailint-0.4.3 → thailint-0.4.5}/src/linters/magic_numbers/__init__.py +0 -0
  63. {thailint-0.4.3 → thailint-0.4.5}/src/linters/magic_numbers/config.py +0 -0
  64. {thailint-0.4.3 → thailint-0.4.5}/src/linters/magic_numbers/context_analyzer.py +0 -0
  65. {thailint-0.4.3 → thailint-0.4.5}/src/linters/magic_numbers/linter.py +0 -0
  66. {thailint-0.4.3 → thailint-0.4.5}/src/linters/magic_numbers/python_analyzer.py +0 -0
  67. {thailint-0.4.3 → thailint-0.4.5}/src/linters/magic_numbers/typescript_analyzer.py +0 -0
  68. {thailint-0.4.3 → thailint-0.4.5}/src/linters/magic_numbers/violation_builder.py +0 -0
  69. {thailint-0.4.3 → thailint-0.4.5}/src/linters/nesting/__init__.py +0 -0
  70. {thailint-0.4.3 → thailint-0.4.5}/src/linters/nesting/config.py +0 -0
  71. {thailint-0.4.3 → thailint-0.4.5}/src/linters/nesting/linter.py +0 -0
  72. {thailint-0.4.3 → thailint-0.4.5}/src/linters/nesting/python_analyzer.py +0 -0
  73. {thailint-0.4.3 → thailint-0.4.5}/src/linters/nesting/typescript_analyzer.py +0 -0
  74. {thailint-0.4.3 → thailint-0.4.5}/src/linters/nesting/typescript_function_extractor.py +0 -0
  75. {thailint-0.4.3 → thailint-0.4.5}/src/linters/nesting/violation_builder.py +0 -0
  76. {thailint-0.4.3 → thailint-0.4.5}/src/linters/srp/__init__.py +0 -0
  77. {thailint-0.4.3 → thailint-0.4.5}/src/linters/srp/class_analyzer.py +0 -0
  78. {thailint-0.4.3 → thailint-0.4.5}/src/linters/srp/config.py +0 -0
  79. {thailint-0.4.3 → thailint-0.4.5}/src/linters/srp/heuristics.py +0 -0
  80. {thailint-0.4.3 → thailint-0.4.5}/src/linters/srp/linter.py +0 -0
  81. {thailint-0.4.3 → thailint-0.4.5}/src/linters/srp/metrics_evaluator.py +0 -0
  82. {thailint-0.4.3 → thailint-0.4.5}/src/linters/srp/python_analyzer.py +0 -0
  83. {thailint-0.4.3 → thailint-0.4.5}/src/linters/srp/typescript_analyzer.py +0 -0
  84. {thailint-0.4.3 → thailint-0.4.5}/src/linters/srp/typescript_metrics_calculator.py +0 -0
  85. {thailint-0.4.3 → thailint-0.4.5}/src/linters/srp/violation_builder.py +0 -0
  86. {thailint-0.4.3 → thailint-0.4.5}/src/orchestrator/__init__.py +0 -0
  87. {thailint-0.4.3 → thailint-0.4.5}/src/orchestrator/language_detector.py +0 -0
  88. {thailint-0.4.3 → thailint-0.4.5}/src/templates/thailint_config_template.yaml +0 -0
  89. {thailint-0.4.3 → thailint-0.4.5}/src/utils/__init__.py +0 -0
  90. {thailint-0.4.3 → thailint-0.4.5}/src/utils/project_root.py +0 -0
@@ -1,8 +1,9 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: thailint
3
- Version: 0.4.3
3
+ Version: 0.4.5
4
4
  Summary: The AI Linter - Enterprise-grade linting and governance for AI-generated code across multiple languages
5
5
  License: MIT
6
+ License-File: LICENSE
6
7
  Keywords: linter,ai,code-quality,static-analysis,file-placement,governance,multi-language,cli,docker,python
7
8
  Author: Steve Jackson
8
9
  Requires-Python: >=3.11,<4.0
@@ -15,6 +16,7 @@ Classifier: Programming Language :: Python :: 3
15
16
  Classifier: Programming Language :: Python :: 3.11
16
17
  Classifier: Programming Language :: Python :: 3.12
17
18
  Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Programming Language :: Python :: 3.14
18
20
  Classifier: Programming Language :: Python :: 3 :: Only
19
21
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
22
  Classifier: Topic :: Software Development :: Quality Assurance
@@ -17,7 +17,7 @@ build-backend = "poetry.core.masonry.api"
17
17
 
18
18
  [tool.poetry]
19
19
  name = "thailint"
20
- version = "0.4.3"
20
+ version = "0.4.5"
21
21
  description = "The AI Linter - Enterprise-grade linting and governance for AI-generated code across multiple languages"
22
22
  authors = ["Steve Jackson"]
23
23
  license = "MIT"
@@ -62,8 +62,15 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
62
62
  """
63
63
  super().__init__()
64
64
  self._filter_registry = filter_registry or create_default_registry()
65
-
66
- def analyze(self, file_path: Path, content: str, config: DRYConfig) -> list[CodeBlock]:
65
+ # Performance optimization: Cache parsed AST to avoid re-parsing for each hash window
66
+ self._cached_ast: ast.Module | None = None
67
+ self._cached_content: str | None = None
68
+ # Performance optimization: Line-to-node index for O(1) lookups instead of O(n) ast.walk()
69
+ self._line_to_nodes: dict[int, list[ast.AST]] | None = None
70
+
71
+ def analyze( # thailint: ignore[nesting.excessive-depth]
72
+ self, file_path: Path, content: str, config: DRYConfig
73
+ ) -> list[CodeBlock]:
67
74
  """Analyze Python file for duplicate code blocks, excluding docstrings.
68
75
 
69
76
  Args:
@@ -74,37 +81,73 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
74
81
  Returns:
75
82
  List of CodeBlock instances with hash values
76
83
  """
77
- # Get docstring line ranges
78
- docstring_ranges = self._get_docstring_ranges_from_content(content)
84
+ # Performance optimization: Parse AST once and cache for _is_single_statement_in_source() calls
85
+ self._cached_ast = self._parse_content_safe(content)
86
+ self._cached_content = content
87
+
88
+ # Performance optimization: Build line-to-node index for O(1) lookups
89
+ self._line_to_nodes = self._build_line_to_node_index(self._cached_ast)
90
+
91
+ try:
92
+ # Get docstring line ranges
93
+ docstring_ranges = self._get_docstring_ranges_from_content(content)
79
94
 
80
- # Tokenize with line number tracking
81
- lines_with_numbers = self._tokenize_with_line_numbers(content, docstring_ranges)
95
+ # Tokenize with line number tracking
96
+ lines_with_numbers = self._tokenize_with_line_numbers(content, docstring_ranges)
82
97
 
83
- # Generate rolling hash windows
84
- windows = self._rolling_hash_with_tracking(lines_with_numbers, config.min_duplicate_lines)
98
+ # Generate rolling hash windows
99
+ windows = self._rolling_hash_with_tracking(
100
+ lines_with_numbers, config.min_duplicate_lines
101
+ )
102
+
103
+ return self._filter_valid_blocks(windows, file_path, content)
104
+ finally:
105
+ # Clear cache after analysis to avoid memory leaks
106
+ self._cached_ast = None
107
+ self._cached_content = None
108
+ self._line_to_nodes = None
85
109
 
110
+ def _filter_valid_blocks(
111
+ self,
112
+ windows: list[tuple[int, int, int, str]],
113
+ file_path: Path,
114
+ content: str,
115
+ ) -> list[CodeBlock]:
116
+ """Filter hash windows and create valid CodeBlock instances."""
86
117
  blocks = []
87
118
  for hash_val, start_line, end_line, snippet in windows:
88
- # Skip blocks that are single logical statements
89
- # Check the original source code, not the normalized snippet
90
- if self._is_single_statement_in_source(content, start_line, end_line):
91
- continue
92
-
93
- block = CodeBlock(
94
- file_path=file_path,
95
- start_line=start_line,
96
- end_line=end_line,
97
- snippet=snippet,
98
- hash_value=hash_val,
119
+ block = self._create_block_if_valid(
120
+ file_path, content, hash_val, start_line, end_line, snippet
99
121
  )
122
+ if block:
123
+ blocks.append(block)
124
+ return blocks
100
125
 
101
- # Apply extensible filters (keyword arguments, imports, etc.)
102
- if self._filter_registry.should_filter_block(block, content):
103
- continue
126
+ def _create_block_if_valid( # pylint: disable=too-many-arguments,too-many-positional-arguments
127
+ self,
128
+ file_path: Path,
129
+ content: str,
130
+ hash_val: int,
131
+ start_line: int,
132
+ end_line: int,
133
+ snippet: str,
134
+ ) -> CodeBlock | None:
135
+ """Create CodeBlock if it passes all validation checks."""
136
+ if self._is_single_statement_in_source(content, start_line, end_line):
137
+ return None
104
138
 
105
- blocks.append(block)
139
+ block = CodeBlock(
140
+ file_path=file_path,
141
+ start_line=start_line,
142
+ end_line=end_line,
143
+ snippet=snippet,
144
+ hash_value=hash_val,
145
+ )
106
146
 
107
- return blocks
147
+ if self._filter_registry.should_filter_block(block, content):
148
+ return None
149
+
150
+ return block
108
151
 
109
152
  def _get_docstring_ranges_from_content(self, content: str) -> set[int]:
110
153
  """Extract line numbers that are part of docstrings.
@@ -172,20 +215,21 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
172
215
  List of (original_line_number, normalized_code) tuples
173
216
  """
174
217
  lines_with_numbers = []
218
+ in_multiline_import = False
175
219
 
176
220
  for line_num, line in enumerate(content.split("\n"), start=1):
177
- # Skip docstring lines
178
221
  if line_num in docstring_lines:
179
222
  continue
180
223
 
181
- # Use hasher's existing tokenization logic
182
- line = self._hasher._strip_comments(line) # pylint: disable=protected-access
183
- line = " ".join(line.split())
184
-
224
+ line = self._hasher._normalize_line(line) # pylint: disable=protected-access
185
225
  if not line:
186
226
  continue
187
227
 
188
- if self._hasher._is_import_statement(line): # pylint: disable=protected-access
228
+ # Update multi-line import state and check if line should be skipped
229
+ in_multiline_import, should_skip = self._hasher._should_skip_import_line( # pylint: disable=protected-access
230
+ line, in_multiline_import
231
+ )
232
+ if should_skip:
189
233
  continue
190
234
 
191
235
  lines_with_numbers.append((line_num, line))
@@ -225,10 +269,20 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
225
269
  return hashes
226
270
 
227
271
  def _is_single_statement_in_source(self, content: str, start_line: int, end_line: int) -> bool:
228
- """Check if a line range in the original source is a single logical statement."""
229
- tree = self._parse_content_safe(content)
230
- if tree is None:
231
- return False
272
+ """Check if a line range in the original source is a single logical statement.
273
+
274
+ Performance optimization: Uses cached AST if available (set by analyze() method)
275
+ to avoid re-parsing the entire file for each hash window check.
276
+ """
277
+ # Use cached AST if available and content matches
278
+ tree: ast.Module | None
279
+ if self._cached_ast is not None and content == self._cached_content:
280
+ tree = self._cached_ast
281
+ else:
282
+ # Fallback: parse content (used by tests or standalone calls)
283
+ tree = self._parse_content_safe(content)
284
+ if tree is None:
285
+ return False
232
286
 
233
287
  return self._check_overlapping_nodes(tree, start_line, end_line)
234
288
 
@@ -240,13 +294,99 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
240
294
  except SyntaxError:
241
295
  return None
242
296
 
297
+ @staticmethod
298
+ def _build_line_to_node_index(tree: ast.Module | None) -> dict[int, list[ast.AST]] | None:
299
+ """Build an index mapping each line number to overlapping AST nodes.
300
+
301
+ Performance optimization: This allows O(1) lookups instead of O(n) ast.walk() calls.
302
+ For a file with 5,144 nodes and 673 hash windows, this reduces 3.46M node operations
303
+ to just ~3,365 relevant node checks (99.9% reduction).
304
+
305
+ Args:
306
+ tree: Parsed AST tree (None if parsing failed)
307
+
308
+ Returns:
309
+ Dictionary mapping line numbers to list of AST nodes overlapping that line,
310
+ or None if tree is None
311
+ """
312
+ if tree is None:
313
+ return None
314
+
315
+ line_to_nodes: dict[int, list[ast.AST]] = {}
316
+ for node in ast.walk(tree):
317
+ if PythonDuplicateAnalyzer._node_has_line_info(node):
318
+ PythonDuplicateAnalyzer._add_node_to_index(node, line_to_nodes)
319
+
320
+ return line_to_nodes
321
+
322
+ @staticmethod
323
+ def _node_has_line_info(node: ast.AST) -> bool:
324
+ """Check if node has valid line number information."""
325
+ if not hasattr(node, "lineno") or not hasattr(node, "end_lineno"):
326
+ return False
327
+ return node.lineno is not None and node.end_lineno is not None
328
+
329
+ @staticmethod
330
+ def _add_node_to_index(node: ast.AST, line_to_nodes: dict[int, list[ast.AST]]) -> None:
331
+ """Add node to all lines it overlaps in the index."""
332
+ for line_num in range(node.lineno, node.end_lineno + 1): # type: ignore[attr-defined]
333
+ if line_num not in line_to_nodes:
334
+ line_to_nodes[line_num] = []
335
+ line_to_nodes[line_num].append(node)
336
+
243
337
  def _check_overlapping_nodes(self, tree: ast.Module, start_line: int, end_line: int) -> bool:
244
- """Check if any AST node overlaps and matches single-statement pattern."""
338
+ """Check if any AST node overlaps and matches single-statement pattern.
339
+
340
+ Performance optimization: Use line-to-node index for O(1) lookups instead of O(n) ast.walk().
341
+ """
342
+ if self._line_to_nodes is not None:
343
+ return self._check_nodes_via_index(start_line, end_line)
344
+ return self._check_nodes_via_walk(tree, start_line, end_line)
345
+
346
+ def _check_nodes_via_index(self, start_line: int, end_line: int) -> bool:
347
+ """Check nodes using line-to-node index for O(1) lookups."""
348
+ candidates = self._collect_candidate_nodes_from_index(start_line, end_line)
349
+ return self._any_node_matches_pattern(candidates, start_line, end_line)
350
+
351
+ def _collect_candidate_nodes_from_index(self, start_line: int, end_line: int) -> set[ast.AST]:
352
+ """Collect unique nodes that overlap with the line range from index."""
353
+ candidate_nodes: set[ast.AST] = set()
354
+ for line_num in range(start_line, end_line + 1):
355
+ if self._line_to_nodes and line_num in self._line_to_nodes:
356
+ candidate_nodes.update(self._line_to_nodes[line_num])
357
+ return candidate_nodes
358
+
359
+ def _any_node_matches_pattern(
360
+ self, nodes: set[ast.AST], start_line: int, end_line: int
361
+ ) -> bool:
362
+ """Check if any node matches single-statement pattern."""
363
+ for node in nodes:
364
+ if self._is_single_statement_pattern(node, start_line, end_line):
365
+ return True
366
+ return False
367
+
368
+ def _check_nodes_via_walk(self, tree: ast.Module, start_line: int, end_line: int) -> bool:
369
+ """Check nodes using ast.walk() fallback for tests or standalone calls."""
245
370
  for node in ast.walk(tree):
246
- if self._node_overlaps_and_matches(node, start_line, end_line):
371
+ if self._node_matches_via_walk(node, start_line, end_line):
247
372
  return True
248
373
  return False
249
374
 
375
+ def _node_matches_via_walk(self, node: ast.AST, start_line: int, end_line: int) -> bool:
376
+ """Check if a single node overlaps and matches pattern."""
377
+ if not self._node_overlaps_range(node, start_line, end_line):
378
+ return False
379
+ return self._is_single_statement_pattern(node, start_line, end_line)
380
+
381
+ @staticmethod
382
+ def _node_overlaps_range(node: ast.AST, start_line: int, end_line: int) -> bool:
383
+ """Check if node overlaps with the given line range."""
384
+ if not hasattr(node, "lineno") or not hasattr(node, "end_lineno"):
385
+ return False
386
+ node_end = node.end_lineno
387
+ node_start = node.lineno
388
+ return not (node_end < start_line or node_start > end_line)
389
+
250
390
  def _node_overlaps_and_matches(self, node: ast.AST, start_line: int, end_line: int) -> bool:
251
391
  """Check if node overlaps with range and matches single-statement pattern."""
252
392
  if not hasattr(node, "lineno") or not hasattr(node, "end_lineno"):
@@ -33,26 +33,80 @@ class TokenHasher:
33
33
  List of normalized code lines (non-empty, comments removed, imports filtered)
34
34
  """
35
35
  lines = []
36
+ in_multiline_import = False
36
37
 
37
38
  for line in code.split("\n"):
38
- # Remove comments (language-specific logic can be added)
39
- line = self._strip_comments(line)
40
-
41
- # Normalize whitespace (collapse to single space)
42
- line = " ".join(line.split())
43
-
44
- # Skip empty lines
39
+ line = self._normalize_line(line)
45
40
  if not line:
46
41
  continue
47
42
 
48
- # Skip import statements (common false positive)
49
- if self._is_import_statement(line):
43
+ # Update multi-line import state and check if line should be skipped
44
+ in_multiline_import, should_skip = self._should_skip_import_line(
45
+ line, in_multiline_import
46
+ )
47
+ if should_skip:
50
48
  continue
51
49
 
52
50
  lines.append(line)
53
51
 
54
52
  return lines
55
53
 
54
+ def _normalize_line(self, line: str) -> str:
55
+ """Normalize a line by removing comments and excess whitespace.
56
+
57
+ Args:
58
+ line: Raw source code line
59
+
60
+ Returns:
61
+ Normalized line (empty string if line has no content)
62
+ """
63
+ line = self._strip_comments(line)
64
+ return " ".join(line.split())
65
+
66
+ def _should_skip_import_line(self, line: str, in_multiline_import: bool) -> tuple[bool, bool]:
67
+ """Determine if an import line should be skipped.
68
+
69
+ Args:
70
+ line: Normalized code line
71
+ in_multiline_import: Whether we're currently inside a multi-line import
72
+
73
+ Returns:
74
+ Tuple of (new_in_multiline_import_state, should_skip_line)
75
+ """
76
+ if self._is_multiline_import_start(line):
77
+ return True, True
78
+
79
+ if in_multiline_import:
80
+ return self._handle_multiline_import_continuation(line)
81
+
82
+ if self._is_import_statement(line):
83
+ return False, True
84
+
85
+ return False, False
86
+
87
+ def _is_multiline_import_start(self, line: str) -> bool:
88
+ """Check if line starts a multi-line import statement.
89
+
90
+ Args:
91
+ line: Normalized code line
92
+
93
+ Returns:
94
+ True if line starts a multi-line import (has opening paren but no closing)
95
+ """
96
+ return self._is_import_statement(line) and "(" in line and ")" not in line
97
+
98
+ def _handle_multiline_import_continuation(self, line: str) -> tuple[bool, bool]:
99
+ """Handle a line that's part of a multi-line import.
100
+
101
+ Args:
102
+ line: Normalized code line inside a multi-line import
103
+
104
+ Returns:
105
+ Tuple of (still_in_import, should_skip)
106
+ """
107
+ closes_import = ")" in line
108
+ return not closes_import, True
109
+
56
110
  def _strip_comments(self, line: str) -> str:
57
111
  """Remove comments from line (Python # and // style).
58
112
 
@@ -0,0 +1,24 @@
1
+ """
2
+ File: src/linters/file_header/__init__.py
3
+ Purpose: File header linter module initialization
4
+ Exports: FileHeaderRule
5
+ Depends: linter.FileHeaderRule
6
+ Implements: Module-level exports for clean API
7
+ Related: linter.py for main rule implementation
8
+
9
+ Overview:
10
+ Initializes the file header linter module providing multi-language file header
11
+ validation with mandatory field checking, atemporal language detection, and configuration
12
+ support. Main entry point for file header linting functionality.
13
+
14
+ Usage:
15
+ from src.linters.file_header import FileHeaderRule
16
+ rule = FileHeaderRule()
17
+ violations = rule.check(context)
18
+
19
+ Notes: Follows standard Python module initialization pattern with __all__ export control
20
+ """
21
+
22
+ from .linter import FileHeaderRule
23
+
24
+ __all__ = ["FileHeaderRule"]
@@ -0,0 +1,87 @@
1
+ """
2
+ File: src/linters/file_header/atemporal_detector.py
3
+ Purpose: Detects temporal language patterns in file headers
4
+ Exports: AtemporalDetector class
5
+ Depends: re module for regex matching
6
+ Implements: Regex-based pattern matching with configurable patterns
7
+ Related: linter.py for detector usage, violation_builder.py for violation creation
8
+
9
+ Overview:
10
+ Implements pattern-based detection of temporal language that violates atemporal
11
+ documentation requirements. Detects dates, temporal qualifiers, state change language,
12
+ and future references using regex patterns. Provides violation details for each pattern match.
13
+
14
+ Usage:
15
+ detector = AtemporalDetector()
16
+ violations = detector.detect_violations(header_text)
17
+
18
+ Notes: Four pattern categories - dates, temporal qualifiers, state changes, future references
19
+ """
20
+
21
+ import re
22
+
23
+
24
+ class AtemporalDetector:
25
+ """Detects temporal language patterns in text."""
26
+
27
+ # Date patterns
28
+ DATE_PATTERNS = [
29
+ (r"\d{4}-\d{2}-\d{2}", "ISO date format (YYYY-MM-DD)"),
30
+ (
31
+ r"(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{4}",
32
+ "Month Year format",
33
+ ),
34
+ (r"(?:Created|Updated|Modified):\s*\d{4}", "Date metadata"),
35
+ ]
36
+
37
+ # Temporal qualifiers
38
+ TEMPORAL_QUALIFIERS = [
39
+ (r"\bcurrently\b", 'temporal qualifier "currently"'),
40
+ (r"\bnow\b", 'temporal qualifier "now"'),
41
+ (r"\brecently\b", 'temporal qualifier "recently"'),
42
+ (r"\bsoon\b", 'temporal qualifier "soon"'),
43
+ (r"\bfor now\b", 'temporal qualifier "for now"'),
44
+ ]
45
+
46
+ # State change language
47
+ STATE_CHANGE = [
48
+ (r"\breplaces?\b", 'state change "replaces"'),
49
+ (r"\bmigrated from\b", 'state change "migrated from"'),
50
+ (r"\bformerly\b", 'state change "formerly"'),
51
+ (r"\bold implementation\b", 'state change "old"'),
52
+ (r"\bnew implementation\b", 'state change "new"'),
53
+ ]
54
+
55
+ # Future references
56
+ FUTURE_REFS = [
57
+ (r"\bwill be\b", 'future reference "will be"'),
58
+ (r"\bplanned\b", 'future reference "planned"'),
59
+ (r"\bto be added\b", 'future reference "to be added"'),
60
+ (r"\bcoming soon\b", 'future reference "coming soon"'),
61
+ ]
62
+
63
+ def detect_violations( # thailint: ignore[nesting]
64
+ self, text: str
65
+ ) -> list[tuple[str, str, int]]:
66
+ """Detect all temporal language violations in text.
67
+
68
+ Args:
69
+ text: Text to check for temporal language
70
+
71
+ Returns:
72
+ List of (pattern, description, line_number) tuples for each violation
73
+ """
74
+ violations = []
75
+
76
+ # Check all pattern categories
77
+ all_patterns = (
78
+ self.DATE_PATTERNS + self.TEMPORAL_QUALIFIERS + self.STATE_CHANGE + self.FUTURE_REFS
79
+ )
80
+
81
+ lines = text.split("\n")
82
+ for line_num, line in enumerate(lines, start=1):
83
+ for pattern, description in all_patterns:
84
+ if re.search(pattern, line, re.IGNORECASE):
85
+ violations.append((pattern, description, line_num))
86
+
87
+ return violations
@@ -0,0 +1,66 @@
1
+ """
2
+ File: src/linters/file_header/config.py
3
+ Purpose: Configuration model for file header linter
4
+ Exports: FileHeaderConfig dataclass
5
+ Depends: dataclasses, pathlib
6
+ Implements: Configuration with validation and defaults
7
+ Related: linter.py for configuration usage
8
+
9
+ Overview:
10
+ Defines configuration structure for file header linter including required fields
11
+ per language, ignore patterns, and validation options. Provides defaults matching
12
+ ai-doc-standard.md requirements and supports loading from .thailint.yaml configuration.
13
+
14
+ Usage:
15
+ config = FileHeaderConfig()
16
+ config = FileHeaderConfig.from_dict(config_dict, "python")
17
+
18
+ Notes: Dataclass with validation and language-specific defaults
19
+ """
20
+
21
+ from dataclasses import dataclass, field
22
+
23
+
24
+ @dataclass
25
+ class FileHeaderConfig:
26
+ """Configuration for file header linting."""
27
+
28
+ # Required fields by language
29
+ required_fields_python: list[str] = field(
30
+ default_factory=lambda: [
31
+ "Purpose",
32
+ "Scope",
33
+ "Overview",
34
+ "Dependencies",
35
+ "Exports",
36
+ "Interfaces",
37
+ "Implementation",
38
+ ]
39
+ )
40
+
41
+ # Enforce atemporal language checking
42
+ enforce_atemporal: bool = True
43
+
44
+ # Patterns to ignore (file paths)
45
+ ignore: list[str] = field(
46
+ default_factory=lambda: ["test/**", "**/migrations/**", "**/__init__.py"]
47
+ )
48
+
49
+ @classmethod
50
+ def from_dict(cls, config_dict: dict, language: str) -> "FileHeaderConfig":
51
+ """Create config from dictionary.
52
+
53
+ Args:
54
+ config_dict: Dictionary of configuration values
55
+ language: Programming language for language-specific config
56
+
57
+ Returns:
58
+ FileHeaderConfig instance with values from dictionary
59
+ """
60
+ return cls(
61
+ required_fields_python=config_dict.get("required_fields", {}).get(
62
+ "python", cls().required_fields_python
63
+ ),
64
+ enforce_atemporal=config_dict.get("enforce_atemporal", True),
65
+ ignore=config_dict.get("ignore", cls().ignore),
66
+ )
@@ -0,0 +1,69 @@
1
+ """
2
+ File: src/linters/file_header/field_validator.py
3
+ Purpose: Validates mandatory fields in file headers
4
+ Exports: FieldValidator class
5
+ Depends: FileHeaderConfig for field requirements
6
+ Implements: Configuration-driven validation with field presence checking
7
+ Related: linter.py for validator usage, config.py for configuration
8
+
9
+ Overview:
10
+ Validates presence and quality of mandatory header fields. Checks that all
11
+ required fields are present, non-empty, and meet minimum content requirements.
12
+ Supports language-specific required fields and provides detailed violation messages.
13
+
14
+ Usage:
15
+ validator = FieldValidator(config)
16
+ violations = validator.validate_fields(fields, "python")
17
+
18
+ Notes: Language-specific field requirements defined in config
19
+ """
20
+
21
+ from .config import FileHeaderConfig
22
+
23
+
24
+ class FieldValidator:
25
+ """Validates mandatory fields in headers."""
26
+
27
+ def __init__(self, config: FileHeaderConfig):
28
+ """Initialize validator with configuration.
29
+
30
+ Args:
31
+ config: File header configuration with required fields
32
+ """
33
+ self.config = config
34
+
35
+ def validate_fields( # thailint: ignore[nesting]
36
+ self, fields: dict[str, str], language: str
37
+ ) -> list[tuple[str, str]]:
38
+ """Validate all required fields are present.
39
+
40
+ Args:
41
+ fields: Dictionary of parsed header fields
42
+ language: File language (python, typescript, etc.)
43
+
44
+ Returns:
45
+ List of (field_name, error_message) tuples for missing/invalid fields
46
+ """
47
+ violations = []
48
+ required_fields = self._get_required_fields(language)
49
+
50
+ for field_name in required_fields:
51
+ if field_name not in fields:
52
+ violations.append((field_name, f"Missing mandatory field: {field_name}"))
53
+ elif not fields[field_name] or len(fields[field_name].strip()) == 0:
54
+ violations.append((field_name, f"Empty mandatory field: {field_name}"))
55
+
56
+ return violations
57
+
58
+ def _get_required_fields(self, language: str) -> list[str]:
59
+ """Get required fields for language.
60
+
61
+ Args:
62
+ language: Programming language
63
+
64
+ Returns:
65
+ List of required field names for the language
66
+ """
67
+ if language == "python":
68
+ return self.config.required_fields_python
69
+ return [] # Other languages in PR5