thailint 0.4.6__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. src/cli.py +228 -1
  2. src/core/cli_utils.py +16 -1
  3. src/core/registry.py +1 -1
  4. src/formatters/__init__.py +22 -0
  5. src/formatters/sarif.py +202 -0
  6. src/linters/file_header/atemporal_detector.py +11 -11
  7. src/linters/file_header/base_parser.py +89 -0
  8. src/linters/file_header/bash_parser.py +58 -0
  9. src/linters/file_header/config.py +76 -16
  10. src/linters/file_header/css_parser.py +70 -0
  11. src/linters/file_header/field_validator.py +35 -29
  12. src/linters/file_header/linter.py +113 -121
  13. src/linters/file_header/markdown_parser.py +124 -0
  14. src/linters/file_header/python_parser.py +14 -58
  15. src/linters/file_header/typescript_parser.py +73 -0
  16. src/linters/file_header/violation_builder.py +13 -12
  17. src/linters/file_placement/linter.py +9 -11
  18. src/linters/magic_numbers/typescript_analyzer.py +1 -0
  19. src/linters/nesting/typescript_analyzer.py +1 -0
  20. src/linters/print_statements/__init__.py +53 -0
  21. src/linters/print_statements/config.py +78 -0
  22. src/linters/print_statements/linter.py +428 -0
  23. src/linters/print_statements/python_analyzer.py +149 -0
  24. src/linters/print_statements/typescript_analyzer.py +130 -0
  25. src/linters/print_statements/violation_builder.py +96 -0
  26. src/templates/thailint_config_template.yaml +26 -0
  27. {thailint-0.4.6.dist-info → thailint-0.7.0.dist-info}/METADATA +149 -3
  28. {thailint-0.4.6.dist-info → thailint-0.7.0.dist-info}/RECORD +31 -18
  29. {thailint-0.4.6.dist-info → thailint-0.7.0.dist-info}/WHEEL +0 -0
  30. {thailint-0.4.6.dist-info → thailint-0.7.0.dist-info}/entry_points.txt +0 -0
  31. {thailint-0.4.6.dist-info → thailint-0.7.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,124 @@
1
+ """
2
+ Purpose: Markdown YAML frontmatter extraction and parsing
3
+
4
+ Scope: Markdown file header parsing from YAML frontmatter
5
+
6
+ Overview: Extracts YAML frontmatter from Markdown files. Frontmatter must be at the
7
+ start of the file, enclosed in --- markers. Parses YAML content to extract
8
+ field values using PyYAML when available, falling back to regex parsing if not.
9
+ Handles both simple key-value pairs and complex YAML structures including lists.
10
+ Flattens nested structures into string representations for field validation.
11
+
12
+ Dependencies: re module for frontmatter pattern matching, yaml module (optional) for parsing, logging module
13
+
14
+ Exports: MarkdownHeaderParser class
15
+
16
+ Interfaces: extract_header(code) -> str | None for frontmatter extraction,
17
+ parse_fields(header) -> dict[str, str] for field parsing
18
+
19
+ Implementation: YAML frontmatter extraction with PyYAML parsing and regex fallback for robustness
20
+ """
21
+
22
+ import logging
23
+ import re
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ class MarkdownHeaderParser: # thailint: ignore[srp]
29
+ """Extracts and parses Markdown file headers from YAML frontmatter.
30
+
31
+ Method count (10) exceeds SRP guideline (8) because proper A-grade complexity
32
+ refactoring requires extracting small focused helper methods. Class maintains
33
+ single responsibility of YAML frontmatter parsing - all methods support this
34
+ core purpose through either PyYAML or simple regex parsing fallback.
35
+ """
36
+
37
+ # Pattern to match YAML frontmatter at start of file
38
+ FRONTMATTER_PATTERN = re.compile(r"^---\s*\n(.*?)\n---", re.DOTALL)
39
+
40
+ def extract_header(self, code: str) -> str | None:
41
+ """Extract YAML frontmatter from Markdown file."""
42
+ if not code or not code.strip():
43
+ return None
44
+
45
+ match = self.FRONTMATTER_PATTERN.match(code)
46
+ return match.group(1).strip() if match else None
47
+
48
+ def parse_fields(self, header: str) -> dict[str, str]:
49
+ """Parse YAML frontmatter into field dictionary."""
50
+ yaml_result = self._try_yaml_parse(header)
51
+ if yaml_result is not None:
52
+ return yaml_result
53
+
54
+ return self._parse_simple_yaml(header)
55
+
56
+ def _try_yaml_parse(self, header: str) -> dict[str, str] | None:
57
+ """Try to parse with PyYAML, returning None if unavailable or failed."""
58
+ try:
59
+ import yaml
60
+
61
+ data = yaml.safe_load(header)
62
+ if isinstance(data, dict):
63
+ return self._flatten_yaml_dict(data)
64
+ except ImportError:
65
+ logger.debug("PyYAML not available, using simple parser")
66
+ except Exception: # noqa: BLE001
67
+ logger.debug("YAML parsing failed, falling back to simple parser")
68
+ return None
69
+
70
+ def _flatten_yaml_dict(self, data: dict) -> dict[str, str]:
71
+ """Convert YAML dict to string values."""
72
+ result: dict[str, str] = {}
73
+ for key, value in data.items():
74
+ result[str(key)] = self._convert_value(value)
75
+ return result
76
+
77
+ def _convert_value(self, value: object) -> str:
78
+ """Convert a single YAML value to string."""
79
+ if isinstance(value, list):
80
+ return ", ".join(str(v) for v in value)
81
+ if value is not None:
82
+ return str(value)
83
+ return ""
84
+
85
+ def _parse_simple_yaml( # thailint: ignore[nesting,dry]
86
+ self, header: str
87
+ ) -> dict[str, str]:
88
+ """Simple regex-based YAML parsing fallback."""
89
+ fields: dict[str, str] = {}
90
+ current_field: str | None = None
91
+ current_value: list[str] = []
92
+
93
+ for line in header.split("\n"):
94
+ if self._is_field_start(line):
95
+ self._save_field(fields, current_field, current_value)
96
+ current_field, current_value = self._start_field(line)
97
+ elif current_field and line.strip():
98
+ current_value.append(self._process_continuation(line))
99
+
100
+ self._save_field(fields, current_field, current_value)
101
+ return fields
102
+
103
+ def _is_field_start(self, line: str) -> bool:
104
+ """Check if line starts a new field (not indented, has colon)."""
105
+ return not line.startswith(" ") and ":" in line
106
+
107
+ def _start_field(self, line: str) -> tuple[str, list[str]]:
108
+ """Parse field start and return field name and initial value."""
109
+ parts = line.split(":", 1)
110
+ field_name = parts[0].strip()
111
+ value = parts[1].strip() if len(parts) > 1 else ""
112
+ return field_name, [value] if value else []
113
+
114
+ def _process_continuation(self, line: str) -> str:
115
+ """Process a continuation line (list item or multiline value)."""
116
+ stripped = line.strip()
117
+ return stripped[2:] if stripped.startswith("- ") else stripped
118
+
119
+ def _save_field(
120
+ self, fields: dict[str, str], field_name: str | None, values: list[str]
121
+ ) -> None:
122
+ """Save field to dictionary if field name exists."""
123
+ if field_name:
124
+ fields[field_name] = "\n".join(values).strip()
@@ -1,29 +1,29 @@
1
1
  """
2
- File: src/linters/file_header/python_parser.py
3
2
  Purpose: Python docstring extraction and parsing for file headers
4
- Exports: PythonHeaderParser class
5
- Depends: Python ast module
6
- Implements: AST-based docstring extraction with field parsing
7
- Related: linter.py for parser usage, field_validator.py for field validation
8
3
 
9
- Overview:
10
- Extracts module-level docstrings from Python files using AST parsing.
4
+ Scope: Python file header parsing from module-level docstrings
5
+
6
+ Overview: Extracts module-level docstrings from Python files using AST parsing.
11
7
  Parses structured header fields from docstring content and handles both
12
8
  well-formed and malformed headers. Provides field extraction and validation
13
- support for FileHeaderRule.
9
+ support for FileHeaderRule. Uses ast.get_docstring() for reliable extraction
10
+ and gracefully handles syntax errors in source code.
11
+
12
+ Dependencies: Python ast module for AST parsing, base_parser.BaseHeaderParser for field parsing
13
+
14
+ Exports: PythonHeaderParser class
14
15
 
15
- Usage:
16
- parser = PythonHeaderParser()
17
- header = parser.extract_header(code)
18
- fields = parser.parse_fields(header)
16
+ Interfaces: extract_header(code) -> str | None for docstring extraction, parse_fields(header) inherited from base
19
17
 
20
- Notes: Uses ast.get_docstring() for reliable module-level docstring extraction
18
+ Implementation: AST-based docstring extraction with syntax error handling
21
19
  """
22
20
 
23
21
  import ast
24
22
 
23
+ from src.linters.file_header.base_parser import BaseHeaderParser
25
24
 
26
- class PythonHeaderParser:
25
+
26
+ class PythonHeaderParser(BaseHeaderParser):
27
27
  """Extracts and parses Python file headers from docstrings."""
28
28
 
29
29
  def extract_header(self, code: str) -> str | None:
@@ -40,47 +40,3 @@ class PythonHeaderParser:
40
40
  return ast.get_docstring(tree)
41
41
  except SyntaxError:
42
42
  return None
43
-
44
- def parse_fields(self, header: str) -> dict[str, str]: # thailint: ignore[nesting]
45
- """Parse structured fields from header text.
46
-
47
- Args:
48
- header: Header docstring text
49
-
50
- Returns:
51
- Dictionary mapping field_name -> field_value
52
- """
53
- fields: dict[str, str] = {}
54
- current_field: str | None = None
55
- current_value: list[str] = []
56
-
57
- for line in header.split("\n"):
58
- if self._is_new_field_line(line):
59
- current_field = self._save_and_start_new_field(
60
- fields, current_field, current_value, line
61
- )
62
- current_value = [line.split(":", 1)[1].strip()]
63
- elif current_field:
64
- current_value.append(line.strip())
65
-
66
- self._save_current_field(fields, current_field, current_value)
67
- return fields
68
-
69
- def _is_new_field_line(self, line: str) -> bool:
70
- """Check if line starts a new field."""
71
- return ":" in line and not line.startswith(" ")
72
-
73
- def _save_and_start_new_field(
74
- self, fields: dict[str, str], current_field: str | None, current_value: list[str], line: str
75
- ) -> str:
76
- """Save current field and start new one."""
77
- if current_field:
78
- fields[current_field] = "\n".join(current_value).strip()
79
- return line.split(":", 1)[0].strip()
80
-
81
- def _save_current_field(
82
- self, fields: dict[str, str], current_field: str | None, current_value: list[str]
83
- ) -> None:
84
- """Save the last field."""
85
- if current_field:
86
- fields[current_field] = "\n".join(current_value).strip()
@@ -0,0 +1,73 @@
1
+ """
2
+ Purpose: TypeScript/JavaScript JSDoc comment extraction and parsing
3
+
4
+ Scope: TypeScript and JavaScript file header parsing from JSDoc comments
5
+
6
+ Overview: Extracts JSDoc-style comments (/** ... */) from TypeScript and JavaScript files.
7
+ Parses structured header fields from JSDoc content and handles both single-line
8
+ and multi-line field values. Distinguishes JSDoc comments from regular block
9
+ comments (/* ... */) by requiring the double asterisk syntax. Cleans formatting
10
+ characters including leading asterisks from content lines.
11
+
12
+ Dependencies: re module for regex-based JSDoc pattern matching, base_parser.BaseHeaderParser for field parsing
13
+
14
+ Exports: TypeScriptHeaderParser class
15
+
16
+ Interfaces: extract_header(code) -> str | None for JSDoc extraction, parse_fields(header) inherited from base
17
+
18
+ Implementation: Regex-based JSDoc extraction with content cleaning and formatting removal
19
+ """
20
+
21
+ import re
22
+
23
+ from src.linters.file_header.base_parser import BaseHeaderParser
24
+
25
+
26
+ class TypeScriptHeaderParser(BaseHeaderParser):
27
+ """Extracts and parses TypeScript/JavaScript file headers from JSDoc comments."""
28
+
29
+ # Pattern to match JSDoc comment at start of file (allowing whitespace before)
30
+ JSDOC_PATTERN = re.compile(r"^\s*/\*\*\s*(.*?)\s*\*/", re.DOTALL)
31
+
32
+ def extract_header(self, code: str) -> str | None:
33
+ """Extract JSDoc comment from TypeScript/JavaScript code.
34
+
35
+ Args:
36
+ code: TypeScript/JavaScript source code
37
+
38
+ Returns:
39
+ JSDoc content or None if not found
40
+ """
41
+ if not code or not code.strip():
42
+ return None
43
+
44
+ match = self.JSDOC_PATTERN.match(code)
45
+ if not match:
46
+ return None
47
+
48
+ # Extract the content inside the JSDoc
49
+ jsdoc_content = match.group(1)
50
+
51
+ # Clean up the JSDoc content - remove leading * from each line
52
+ return self._clean_jsdoc_content(jsdoc_content)
53
+
54
+ def _clean_jsdoc_content(self, content: str) -> str:
55
+ """Remove JSDoc formatting (leading asterisks) from content.
56
+
57
+ Args:
58
+ content: Raw JSDoc content
59
+
60
+ Returns:
61
+ Cleaned content without leading asterisks
62
+ """
63
+ lines = content.split("\n")
64
+ cleaned_lines = []
65
+
66
+ for line in lines:
67
+ # Remove leading whitespace and asterisk
68
+ stripped = line.strip()
69
+ if stripped.startswith("*"):
70
+ stripped = stripped[1:].strip()
71
+ cleaned_lines.append(stripped)
72
+
73
+ return "\n".join(cleaned_lines)
@@ -1,21 +1,22 @@
1
1
  """
2
- File: src/linters/file_header/violation_builder.py
3
2
  Purpose: Builds violation messages for file header linter
4
- Exports: ViolationBuilder class
5
- Depends: Violation type from core
6
- Implements: Message templates with context-specific details
7
- Related: linter.py for builder usage, atemporal_detector.py for temporal violations
8
3
 
9
- Overview:
10
- Creates formatted violation messages for file header validation failures.
4
+ Scope: Violation message creation for file header validation failures
5
+
6
+ Overview: Creates formatted violation messages for file header validation failures.
11
7
  Handles missing fields, atemporal language, and other header issues with clear,
12
- actionable messages. Provides consistent violation format across all validation types.
8
+ actionable messages. Provides consistent violation format across all validation types
9
+ including rule_id, message, location, severity, and helpful suggestions. Supports
10
+ multiple violation types with appropriate error messages and remediation guidance.
11
+
12
+ Dependencies: Violation and Severity types from core.types module
13
+
14
+ Exports: ViolationBuilder class
13
15
 
14
- Usage:
15
- builder = ViolationBuilder("file-header.validation")
16
- violation = builder.build_missing_field("Purpose", "test.py", 1)
16
+ Interfaces: build_missing_field(field_name, file_path, line) -> Violation,
17
+ build_atemporal_violation(pattern, description, file_path, line) -> Violation
17
18
 
18
- Notes: Follows standard violation format with rule_id, message, location, severity, suggestion
19
+ Implementation: Builder pattern with message templates for different violation types
19
20
  """
20
21
 
21
22
  from src.core.types import Severity, Violation
@@ -6,22 +6,20 @@ Scope: Validate file organization against allow/deny patterns
6
6
  Overview: Implements file placement validation using regex patterns from JSON/YAML config.
7
7
  Orchestrates configuration loading, pattern validation, path resolution, rule checking,
8
8
  and violation creation through focused helper classes. Supports directory-specific rules,
9
- global patterns, and generates helpful suggestions. Main linter class acts as coordinator.
9
+ global patterns, and generates helpful suggestions. Main linter class acts as coordinator
10
+ using composition pattern with specialized helper classes for configuration loading,
11
+ path resolution, pattern matching, and violation creation.
10
12
 
11
- Dependencies: src.core (base classes, types), pathlib, typing
13
+ Dependencies: src.core (base classes, types), pathlib, typing, json, yaml modules
12
14
 
13
15
  Exports: FilePlacementLinter, FilePlacementRule
14
16
 
15
- Implementation: Composition pattern with helper classes for each responsibility
17
+ Interfaces: lint_path(file_path) -> list[Violation], check_file_allowed(file_path) -> bool,
18
+ lint_directory(dir_path) -> list[Violation]
16
19
 
17
- SRP Exception: FilePlacementRule has 13 methods (exceeds max 8)
18
- Justification: Framework adapter class that bridges BaseLintRule interface with
19
- FilePlacementLinter implementation. Must handle multiple config sources (metadata vs file),
20
- multiple config formats (wrapped vs unwrapped), project root detection with fallbacks,
21
- and linter caching. This complexity is inherent to adapter pattern - splitting would
22
- create unnecessary indirection between framework and implementation without improving
23
- maintainability. All methods are focused on the single responsibility of integrating
24
- file placement validation with the linting framework.
20
+ Implementation: Composition pattern with helper classes for each responsibility
21
+ (ConfigLoader, PathResolver, PatternMatcher, PatternValidator, RuleChecker,
22
+ ViolationFactory)
25
23
  """
26
24
 
27
25
  import json
@@ -26,6 +26,7 @@ from typing import Any
26
26
 
27
27
  from src.analyzers.typescript_base import TypeScriptBaseAnalyzer
28
28
 
29
+ # dry: ignore-block - tree-sitter import pattern (common across TypeScript analyzers)
29
30
  try:
30
31
  from tree_sitter import Node
31
32
 
@@ -22,6 +22,7 @@ from typing import Any
22
22
 
23
23
  from src.analyzers.typescript_base import TypeScriptBaseAnalyzer
24
24
 
25
+ # dry: ignore-block - tree-sitter import pattern (common across TypeScript analyzers)
25
26
  try:
26
27
  from tree_sitter import Node
27
28
 
@@ -0,0 +1,53 @@
1
+ """
2
+ File: src/linters/print_statements/__init__.py
3
+
4
+ Purpose: Print statements linter package exports and convenience functions
5
+
6
+ Exports: PrintStatementRule class, PrintStatementConfig dataclass, lint() convenience function
7
+
8
+ Depends: .linter for PrintStatementRule, .config for PrintStatementConfig
9
+
10
+ Implements: lint(file_path, config) -> list[Violation] for simple linting operations
11
+
12
+ Related: src/linters/magic_numbers/__init__.py, src/core/base.py
13
+
14
+ Overview: Provides the public interface for the print statements linter package. Exports main
15
+ PrintStatementRule class for use by the orchestrator and PrintStatementConfig for configuration.
16
+ Includes lint() convenience function that provides a simple API for running the print statements
17
+ linter on a file without directly interacting with the orchestrator. This module serves as the
18
+ entry point for users of the print statements linter, hiding implementation details and exposing
19
+ only the essential components needed for linting operations.
20
+
21
+ Usage: from src.linters.print_statements import PrintStatementRule, lint
22
+ violations = lint("path/to/file.py")
23
+
24
+ Notes: Module-level exports with __all__ definition, convenience function wrapper
25
+ """
26
+
27
+ from .config import PrintStatementConfig
28
+ from .linter import PrintStatementRule
29
+
30
+ __all__ = ["PrintStatementRule", "PrintStatementConfig", "lint"]
31
+
32
+
33
+ def lint(file_path: str, config: dict | None = None) -> list:
34
+ """Convenience function for linting a file for print statements.
35
+
36
+ Args:
37
+ file_path: Path to the file to lint
38
+ config: Optional configuration dictionary
39
+
40
+ Returns:
41
+ List of violations found
42
+ """
43
+ from pathlib import Path
44
+
45
+ from src.orchestrator.core import FileLintContext
46
+
47
+ rule = PrintStatementRule()
48
+ context = FileLintContext(
49
+ path=Path(file_path),
50
+ lang="python",
51
+ )
52
+
53
+ return rule.check(context)
@@ -0,0 +1,78 @@
1
+ """
2
+ Purpose: Configuration schema for print statements linter
3
+
4
+ Scope: Print statements linter configuration for all supported languages
5
+
6
+ Overview: Defines configuration schema for print statements linter. Provides PrintStatementConfig
7
+ dataclass with enabled flag, ignore patterns list, allow_in_scripts setting (default True to
8
+ allow print in __main__ blocks), and console_methods set (default includes log, warn, error,
9
+ debug, info) for TypeScript/JavaScript console method detection. Supports per-file and
10
+ per-directory config overrides through from_dict class method. Integrates with orchestrator's
11
+ configuration system to allow users to customize detection via .thailint.yaml configuration.
12
+
13
+ Dependencies: dataclasses module for configuration structure, typing module for type hints
14
+
15
+ Exports: PrintStatementConfig dataclass
16
+
17
+ Interfaces: from_dict(config, language) -> PrintStatementConfig for configuration loading from dictionary
18
+
19
+ Implementation: Dataclass with defaults matching common use cases and language-specific override support
20
+ """
21
+
22
+ from dataclasses import dataclass, field
23
+ from typing import Any
24
+
25
+
26
+ @dataclass
27
+ class PrintStatementConfig:
28
+ """Configuration for print statements linter."""
29
+
30
+ enabled: bool = True
31
+ ignore: list[str] = field(default_factory=list)
32
+ allow_in_scripts: bool = True
33
+ console_methods: set[str] = field(
34
+ default_factory=lambda: {"log", "warn", "error", "debug", "info"}
35
+ )
36
+
37
+ @classmethod
38
+ def from_dict(
39
+ cls, config: dict[str, Any], language: str | None = None
40
+ ) -> "PrintStatementConfig":
41
+ """Load configuration from dictionary with language-specific overrides.
42
+
43
+ Args:
44
+ config: Dictionary containing configuration values
45
+ language: Programming language (python, typescript, javascript)
46
+ for language-specific settings
47
+
48
+ Returns:
49
+ PrintStatementConfig instance with values from dictionary
50
+ """
51
+ # Get language-specific config if available
52
+ if language and language in config:
53
+ lang_config = config[language]
54
+ allow_in_scripts = lang_config.get(
55
+ "allow_in_scripts", config.get("allow_in_scripts", True)
56
+ )
57
+ console_methods = set(
58
+ lang_config.get(
59
+ "console_methods",
60
+ config.get("console_methods", ["log", "warn", "error", "debug", "info"]),
61
+ )
62
+ )
63
+ else:
64
+ allow_in_scripts = config.get("allow_in_scripts", True)
65
+ console_methods = set(
66
+ config.get("console_methods", ["log", "warn", "error", "debug", "info"])
67
+ )
68
+
69
+ ignore_patterns = config.get("ignore", [])
70
+ if not isinstance(ignore_patterns, list):
71
+ ignore_patterns = []
72
+
73
+ return cls(
74
+ enabled=config.get("enabled", True),
75
+ ignore=ignore_patterns,
76
+ allow_in_scripts=allow_in_scripts,
77
+ console_methods=console_methods,
78
+ )