thailint 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
src/cli.py CHANGED
@@ -38,7 +38,11 @@ logger = logging.getLogger(__name__)
38
38
  def format_option(func):
39
39
  """Add --format option to a command for output format selection."""
40
40
  return click.option(
41
- "--format", "-f", type=click.Choice(["text", "json"]), default="text", help="Output format"
41
+ "--format",
42
+ "-f",
43
+ type=click.Choice(["text", "json", "sarif"]),
44
+ default="text",
45
+ help="Output format",
42
46
  )(func)
43
47
 
44
48
 
@@ -1661,5 +1665,118 @@ def _execute_print_statements_lint( # pylint: disable=too-many-arguments,too-ma
1661
1665
  sys.exit(1 if print_statements_violations else 0)
1662
1666
 
1663
1667
 
1668
+ # File Header Command Helper Functions
1669
+
1670
+
1671
+ def _setup_file_header_orchestrator(
1672
+ path_objs: list[Path], config_file: str | None, verbose: bool, project_root: Path | None = None
1673
+ ):
1674
+ """Set up orchestrator for file-header command."""
1675
+ from src.orchestrator.core import Orchestrator
1676
+ from src.utils.project_root import get_project_root
1677
+
1678
+ # Use provided project_root or fall back to auto-detection
1679
+ if project_root is None:
1680
+ first_path = path_objs[0] if path_objs else Path.cwd()
1681
+ search_start = first_path if first_path.is_dir() else first_path.parent
1682
+ project_root = get_project_root(search_start)
1683
+
1684
+ orchestrator = Orchestrator(project_root=project_root)
1685
+
1686
+ if config_file:
1687
+ _load_config_file(orchestrator, config_file, verbose)
1688
+
1689
+ return orchestrator
1690
+
1691
+
1692
+ def _run_file_header_lint(orchestrator, path_objs: list[Path], recursive: bool):
1693
+ """Execute file-header lint on files or directories."""
1694
+ all_violations = _execute_linting_on_paths(orchestrator, path_objs, recursive)
1695
+ return [v for v in all_violations if "file-header" in v.rule_id]
1696
+
1697
+
1698
+ @cli.command("file-header")
1699
+ @click.argument("paths", nargs=-1, type=click.Path())
1700
+ @click.option("--config", "-c", "config_file", type=click.Path(), help="Path to config file")
1701
+ @format_option
1702
+ @click.option("--recursive/--no-recursive", default=True, help="Scan directories recursively")
1703
+ @click.pass_context
1704
+ def file_header(
1705
+ ctx,
1706
+ paths: tuple[str, ...],
1707
+ config_file: str | None,
1708
+ format: str,
1709
+ recursive: bool,
1710
+ ):
1711
+ """Check file headers for mandatory fields and atemporal language.
1712
+
1713
+ Validates that source files have proper documentation headers containing
1714
+ required fields (Purpose, Scope, Overview, etc.) and don't use temporal
1715
+ language (dates, "currently", "now", etc.).
1716
+
1717
+ Supports Python, TypeScript, JavaScript, Bash, Markdown, and CSS files.
1718
+
1719
+ PATHS: Files or directories to lint (defaults to current directory if none provided)
1720
+
1721
+ Examples:
1722
+
1723
+ \b
1724
+ # Check current directory (all files recursively)
1725
+ thai-lint file-header
1726
+
1727
+ \b
1728
+ # Check specific directory
1729
+ thai-lint file-header src/
1730
+
1731
+ \b
1732
+ # Check single file
1733
+ thai-lint file-header src/cli.py
1734
+
1735
+ \b
1736
+ # Check multiple files
1737
+ thai-lint file-header src/cli.py src/api.py tests/
1738
+
1739
+ \b
1740
+ # Get JSON output
1741
+ thai-lint file-header --format json .
1742
+
1743
+ \b
1744
+ # Get SARIF output for CI/CD integration
1745
+ thai-lint file-header --format sarif src/
1746
+
1747
+ \b
1748
+ # Use custom config file
1749
+ thai-lint file-header --config .thailint.yaml src/
1750
+ """
1751
+ verbose = ctx.obj.get("verbose", False)
1752
+ project_root = _get_project_root_from_context(ctx)
1753
+
1754
+ # Default to current directory if no paths provided
1755
+ if not paths:
1756
+ paths = (".",)
1757
+
1758
+ path_objs = [Path(p) for p in paths]
1759
+
1760
+ try:
1761
+ _execute_file_header_lint(path_objs, config_file, format, recursive, verbose, project_root)
1762
+ except Exception as e:
1763
+ _handle_linting_error(e, verbose)
1764
+
1765
+
1766
+ def _execute_file_header_lint( # pylint: disable=too-many-arguments,too-many-positional-arguments
1767
+ path_objs, config_file, format, recursive, verbose, project_root=None
1768
+ ):
1769
+ """Execute file-header lint."""
1770
+ _validate_paths_exist(path_objs)
1771
+ orchestrator = _setup_file_header_orchestrator(path_objs, config_file, verbose, project_root)
1772
+ file_header_violations = _run_file_header_lint(orchestrator, path_objs, recursive)
1773
+
1774
+ if verbose:
1775
+ logger.info(f"Found {len(file_header_violations)} file header violation(s)")
1776
+
1777
+ format_violations(file_header_violations, format)
1778
+ sys.exit(1 if file_header_violations else 0)
1779
+
1780
+
1664
1781
  if __name__ == "__main__":
1665
1782
  cli()
src/core/cli_utils.py CHANGED
@@ -146,10 +146,12 @@ def format_violations(violations: list, output_format: str) -> None:
146
146
 
147
147
  Args:
148
148
  violations: List of violation objects with rule_id, file_path, line, column, message, severity
149
- output_format: Output format ("text" or "json")
149
+ output_format: Output format ("text", "json", or "sarif")
150
150
  """
151
151
  if output_format == "json":
152
152
  _output_json(violations)
153
+ elif output_format == "sarif":
154
+ _output_sarif(violations)
153
155
  else:
154
156
  _output_text(violations)
155
157
 
@@ -177,6 +179,19 @@ def _output_json(violations: list) -> None:
177
179
  click.echo(json.dumps(output, indent=2))
178
180
 
179
181
 
182
+ def _output_sarif(violations: list) -> None:
183
+ """Output violations in SARIF v2.1.0 format.
184
+
185
+ Args:
186
+ violations: List of violation objects
187
+ """
188
+ from src.formatters.sarif import SarifFormatter
189
+
190
+ formatter = SarifFormatter()
191
+ sarif_doc = formatter.format(violations)
192
+ click.echo(json.dumps(sarif_doc, indent=2))
193
+
194
+
180
195
  def _output_text(violations: list) -> None:
181
196
  """Output violations in human-readable text format.
182
197
 
src/core/registry.py CHANGED
@@ -6,7 +6,7 @@ Scope: Dynamic rule management and discovery across all linter plugin packages
6
6
  Overview: Implements rule registry that maintains a collection of registered linting rules indexed
7
7
  by rule_id. Provides methods to register individual rules, retrieve rules by identifier, list
8
8
  all available rules, and discover rules from packages using the RuleDiscovery helper. Enables
9
- the extensible plugin architecture by allowing rules to be added dynamically without framework
9
+ the extensible plugin architecture by allowing dynamic rule registration without framework
10
10
  modifications. Validates rule uniqueness and handles registration errors gracefully.
11
11
 
12
12
  Dependencies: BaseLintRule, RuleDiscovery
@@ -0,0 +1,22 @@
1
+ """
2
+ Purpose: SARIF formatter package for thai-lint output
3
+
4
+ Scope: SARIF v2.1.0 formatter implementation and package exports
5
+
6
+ Overview: Formatters package providing SARIF (Static Analysis Results Interchange Format) v2.1.0
7
+ output generation from thai-lint Violation objects. Enables integration with GitHub Code
8
+ Scanning, Azure DevOps, VS Code SARIF Viewer, and other industry-standard CI/CD platforms.
9
+ Provides the SarifFormatter class for converting violations to SARIF JSON documents.
10
+
11
+ Dependencies: sarif module for SarifFormatter class
12
+
13
+ Exports: SarifFormatter class from sarif.py module
14
+
15
+ Interfaces: from src.formatters.sarif import SarifFormatter
16
+
17
+ Implementation: Package initialization with SarifFormatter export
18
+ """
19
+
20
+ from src.formatters.sarif import SarifFormatter
21
+
22
+ __all__ = ["SarifFormatter"]
@@ -0,0 +1,202 @@
1
+ """
2
+ Purpose: SARIF v2.1.0 formatter for converting Violation objects to SARIF JSON documents
3
+
4
+ Scope: SARIF document generation, tool metadata, result conversion, location mapping
5
+
6
+ Overview: Implements SarifFormatter class that converts thai-lint Violation objects to SARIF
7
+ (Static Analysis Results Interchange Format) v2.1.0 compliant JSON documents. Produces
8
+ output compatible with GitHub Code Scanning, Azure DevOps, VS Code SARIF Viewer, and
9
+ other industry-standard static analysis tools. Handles proper field mapping including
10
+ 1-indexed column conversion, rule metadata deduplication, and tool versioning.
11
+
12
+ Dependencies: src (for __version__), src.core.types (Violation, Severity)
13
+
14
+ Exports: SarifFormatter class with format() method
15
+
16
+ Interfaces: SarifFormatter.format(violations: list[Violation]) -> dict
17
+
18
+ Implementation: Converts Violation objects to SARIF structure with proper indexing and metadata
19
+ """
20
+
21
+ from typing import Any
22
+
23
+ from src import __version__
24
+ from src.core.types import Violation
25
+
26
+
27
+ class SarifFormatter:
28
+ """Formats Violation objects as SARIF v2.1.0 JSON documents.
29
+
30
+ SARIF (Static Analysis Results Interchange Format) is the OASIS standard
31
+ for static analysis tool output, enabling integration with GitHub Code
32
+ Scanning, Azure DevOps, and other CI/CD platforms.
33
+
34
+ Attributes:
35
+ tool_name: Name of the tool in SARIF output (default: "thai-lint")
36
+ tool_version: Version string for the tool (default: package version)
37
+ information_uri: URL for tool documentation
38
+ """
39
+
40
+ SARIF_VERSION = "2.1.0"
41
+ SARIF_SCHEMA = (
42
+ "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/"
43
+ "main/sarif-2.1/schema/sarif-schema-2.1.0.json"
44
+ )
45
+ DEFAULT_INFORMATION_URI = "https://github.com/be-wise-be-kind/thai-lint"
46
+
47
+ def __init__(
48
+ self,
49
+ tool_name: str = "thai-lint",
50
+ tool_version: str | None = None,
51
+ information_uri: str | None = None,
52
+ ) -> None:
53
+ """Initialize SarifFormatter with tool metadata.
54
+
55
+ Args:
56
+ tool_name: Name of the tool (default: "thai-lint")
57
+ tool_version: Version string (default: package __version__)
58
+ information_uri: URL for tool documentation
59
+ """
60
+ self.tool_name = tool_name
61
+ self.tool_version = tool_version or __version__
62
+ self.information_uri = information_uri or self.DEFAULT_INFORMATION_URI
63
+
64
+ def format(self, violations: list[Violation]) -> dict[str, Any]:
65
+ """Convert violations to SARIF v2.1.0 document.
66
+
67
+ Args:
68
+ violations: List of Violation objects to format
69
+
70
+ Returns:
71
+ SARIF v2.1.0 compliant dictionary ready for JSON serialization
72
+ """
73
+ return {
74
+ "version": self.SARIF_VERSION,
75
+ "$schema": self.SARIF_SCHEMA,
76
+ "runs": [self._create_run(violations)],
77
+ }
78
+
79
+ def _create_run(self, violations: list[Violation]) -> dict[str, Any]:
80
+ """Create a SARIF run object containing tool and results.
81
+
82
+ Args:
83
+ violations: List of violations for this run
84
+
85
+ Returns:
86
+ SARIF run object with tool and results
87
+ """
88
+ return {
89
+ "tool": self._create_tool(violations),
90
+ "results": [self._create_result(v) for v in violations],
91
+ }
92
+
93
+ def _create_tool(self, violations: list[Violation]) -> dict[str, Any]:
94
+ """Create SARIF tool object with driver metadata.
95
+
96
+ Args:
97
+ violations: List of violations to extract rule metadata from
98
+
99
+ Returns:
100
+ SARIF tool object with driver
101
+ """
102
+ return {
103
+ "driver": {
104
+ "name": self.tool_name,
105
+ "version": self.tool_version,
106
+ "informationUri": self.information_uri,
107
+ "rules": self._create_rules(violations),
108
+ }
109
+ }
110
+
111
+ def _create_rules(self, violations: list[Violation]) -> list[dict[str, Any]]:
112
+ """Create deduplicated SARIF rules array from violations.
113
+
114
+ Args:
115
+ violations: List of violations to extract unique rules from
116
+
117
+ Returns:
118
+ List of SARIF rule objects with unique IDs
119
+ """
120
+ seen_rule_ids: set[str] = set()
121
+ rules: list[dict[str, Any]] = []
122
+
123
+ for violation in violations:
124
+ if violation.rule_id not in seen_rule_ids:
125
+ seen_rule_ids.add(violation.rule_id)
126
+ rules.append(self._create_rule(violation))
127
+
128
+ return rules
129
+
130
+ def _create_rule(self, violation: Violation) -> dict[str, Any]:
131
+ """Create SARIF rule object from violation.
132
+
133
+ Args:
134
+ violation: Violation to extract rule metadata from
135
+
136
+ Returns:
137
+ SARIF rule object with id and shortDescription
138
+ """
139
+ # Extract rule category from rule_id (e.g., "nesting" from "nesting.excessive-depth")
140
+ parts = violation.rule_id.split(".")
141
+ category = parts[0] if parts else violation.rule_id
142
+
143
+ descriptions: dict[str, str] = {
144
+ "file-placement": "File placement violation",
145
+ "nesting": "Nesting depth violation",
146
+ "srp": "Single Responsibility Principle violation",
147
+ "dry": "Don't Repeat Yourself violation",
148
+ "magic-number": "Magic number violation",
149
+ "magic-numbers": "Magic number violation",
150
+ "file-header": "File header violation",
151
+ "print-statements": "Print statement violation",
152
+ }
153
+
154
+ description = descriptions.get(category, f"Rule: {violation.rule_id}")
155
+
156
+ return {
157
+ "id": violation.rule_id,
158
+ "shortDescription": {
159
+ "text": description,
160
+ },
161
+ }
162
+
163
+ def _create_result(self, violation: Violation) -> dict[str, Any]:
164
+ """Create SARIF result object from violation.
165
+
166
+ Args:
167
+ violation: Violation to convert to SARIF result
168
+
169
+ Returns:
170
+ SARIF result object with ruleId, level, message, locations
171
+ """
172
+ # thai-lint uses binary severity (ERROR only), map all to "error" level
173
+ return {
174
+ "ruleId": violation.rule_id,
175
+ "level": "error",
176
+ "message": {
177
+ "text": violation.message,
178
+ },
179
+ "locations": [self._create_location(violation)],
180
+ }
181
+
182
+ def _create_location(self, violation: Violation) -> dict[str, Any]:
183
+ """Create SARIF location object from violation.
184
+
185
+ Args:
186
+ violation: Violation with location information
187
+
188
+ Returns:
189
+ SARIF location object with physicalLocation
190
+ """
191
+ return {
192
+ "physicalLocation": {
193
+ "artifactLocation": {
194
+ "uri": violation.file_path,
195
+ },
196
+ "region": {
197
+ "startLine": violation.line,
198
+ # SARIF uses 1-indexed columns, Violation uses 0-indexed
199
+ "startColumn": violation.column + 1,
200
+ },
201
+ }
202
+ }
@@ -1,21 +1,21 @@
1
1
  """
2
- File: src/linters/file_header/atemporal_detector.py
3
2
  Purpose: Detects temporal language patterns in file headers
4
- Exports: AtemporalDetector class
5
- Depends: re module for regex matching
6
- Implements: Regex-based pattern matching with configurable patterns
7
- Related: linter.py for detector usage, violation_builder.py for violation creation
8
3
 
9
- Overview:
10
- Implements pattern-based detection of temporal language that violates atemporal
4
+ Scope: File header validation for atemporal language compliance
5
+
6
+ Overview: Implements pattern-based detection of temporal language that violates atemporal
11
7
  documentation requirements. Detects dates, temporal qualifiers, state change language,
12
8
  and future references using regex patterns. Provides violation details for each pattern match.
9
+ Uses four pattern categories (dates, temporal qualifiers, state changes, future references)
10
+ to identify violations and returns detailed information for each match.
11
+
12
+ Dependencies: re module for regex-based pattern matching
13
+
14
+ Exports: AtemporalDetector class with detect_violations method
13
15
 
14
- Usage:
15
- detector = AtemporalDetector()
16
- violations = detector.detect_violations(header_text)
16
+ Interfaces: detect_violations(text) -> list[tuple[str, str, int]] returns pattern matches with line numbers
17
17
 
18
- Notes: Four pattern categories - dates, temporal qualifiers, state changes, future references
18
+ Implementation: Regex-based pattern matching with predefined patterns organized by category
19
19
  """
20
20
 
21
21
  import re
@@ -0,0 +1,89 @@
1
+ """
2
+ Purpose: Base class for file header parsers with common field parsing logic
3
+
4
+ Scope: File header parsing infrastructure for all language-specific parsers
5
+
6
+ Overview: Provides common field parsing functionality shared across all language-specific
7
+ header parsers. Implements the parse_fields method and helper methods for
8
+ detecting field lines and saving fields. Uses template method pattern where subclasses
9
+ implement extract_header for language-specific header extraction while this base class
10
+ handles field parsing logic. Supports multi-line field values and field continuation.
11
+
12
+ Dependencies: re module for field pattern matching, abc module for abstract base class
13
+
14
+ Exports: BaseHeaderParser abstract base class
15
+
16
+ Interfaces: extract_header(code) abstract method, parse_fields(header) -> dict[str, str] for field extraction
17
+
18
+ Implementation: Template method pattern with shared field parsing and language-specific extraction
19
+ """
20
+
21
+ import re
22
+ from abc import ABC, abstractmethod
23
+
24
+
25
+ class BaseHeaderParser(ABC):
26
+ """Base class for file header parsers with common field parsing logic."""
27
+
28
+ # Pattern to match field names (word characters and /)
29
+ FIELD_NAME_PATTERN = re.compile(r"^[\w/]+$")
30
+
31
+ @abstractmethod
32
+ def extract_header(self, code: str) -> str | None:
33
+ """Extract header from source code.
34
+
35
+ Args:
36
+ code: Source code
37
+
38
+ Returns:
39
+ Header content or None if not found
40
+ """
41
+
42
+ def parse_fields(self, header: str) -> dict[str, str]: # thailint: ignore[nesting]
43
+ """Parse structured fields from header text.
44
+
45
+ Args:
46
+ header: Header text
47
+
48
+ Returns:
49
+ Dictionary mapping field_name -> field_value
50
+ """
51
+ fields: dict[str, str] = {}
52
+ current_field: str | None = None
53
+ current_value: list[str] = []
54
+
55
+ for line in header.split("\n"):
56
+ if self._is_field_line(line):
57
+ self._save_field(fields, current_field, current_value)
58
+ current_field, current_value = self._start_new_field(line)
59
+ elif current_field:
60
+ current_value.append(line.strip())
61
+
62
+ self._save_field(fields, current_field, current_value)
63
+ return fields
64
+
65
+ def _is_field_line(self, line: str) -> bool:
66
+ """Check if line starts a new field."""
67
+ if ":" not in line:
68
+ return False
69
+
70
+ colon_pos = line.find(":")
71
+ if colon_pos <= 0:
72
+ return False
73
+
74
+ field_name = line[:colon_pos].strip()
75
+ return bool(self.FIELD_NAME_PATTERN.match(field_name))
76
+
77
+ def _start_new_field(self, line: str) -> tuple[str, list[str]]:
78
+ """Parse a field line and start tracking its value."""
79
+ parts = line.split(":", 1)
80
+ field_name = parts[0].strip()
81
+ initial_value = parts[1].strip() if len(parts) > 1 else ""
82
+ return field_name, [initial_value] if initial_value else []
83
+
84
+ def _save_field(
85
+ self, fields: dict[str, str], field_name: str | None, value_lines: list[str]
86
+ ) -> None:
87
+ """Save accumulated field value to fields dict."""
88
+ if field_name:
89
+ fields[field_name] = "\n".join(value_lines).strip()
@@ -0,0 +1,58 @@
1
+ """
2
+ Purpose: Bash shell script comment header extraction and parsing
3
+
4
+ Scope: Bash and shell script file header parsing
5
+
6
+ Overview: Extracts hash comment headers from Bash shell scripts. Handles shebang lines
7
+ (#!/bin/bash, #!/usr/bin/env bash, etc.) by skipping them and extracting the
8
+ comment block that follows. Parses structured header fields from comment content.
9
+ Extracts contiguous comment blocks from the start of the file and processes them
10
+ into structured fields for validation.
11
+
12
+ Dependencies: base_parser.BaseHeaderParser for common field parsing functionality
13
+
14
+ Exports: BashHeaderParser class
15
+
16
+ Interfaces: extract_header(code) -> str | None for comment extraction, parse_fields(header) inherited from base
17
+
18
+ Implementation: Skips shebang and preamble, then extracts contiguous hash comment block
19
+ """
20
+
21
+ from src.linters.file_header.base_parser import BaseHeaderParser
22
+
23
+
24
+ class BashHeaderParser(BaseHeaderParser):
25
+ """Extracts and parses Bash file headers from comment blocks."""
26
+
27
+ def extract_header(self, code: str) -> str | None:
28
+ """Extract comment header from Bash script."""
29
+ if not code or not code.strip():
30
+ return None
31
+
32
+ lines = self._skip_preamble(code.split("\n"))
33
+ header_lines = self._extract_comment_block(lines)
34
+
35
+ return "\n".join(header_lines) if header_lines else None
36
+
37
+ def _skip_preamble(self, lines: list[str]) -> list[str]: # thailint: ignore[nesting]
38
+ """Skip shebang and leading empty lines."""
39
+ result = []
40
+ skipping = True
41
+ for line in lines:
42
+ stripped = line.strip()
43
+ if skipping:
44
+ if stripped.startswith("#!") or not stripped:
45
+ continue
46
+ skipping = False
47
+ result.append(stripped)
48
+ return result
49
+
50
+ def _extract_comment_block(self, lines: list[str]) -> list[str]:
51
+ """Extract contiguous comment lines from start of input."""
52
+ result = []
53
+ for line in lines:
54
+ if line.startswith("#"):
55
+ result.append(line[1:].strip())
56
+ else:
57
+ break
58
+ return result