thailint 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- src/cli.py +118 -1
- src/core/cli_utils.py +16 -1
- src/core/registry.py +1 -1
- src/formatters/__init__.py +22 -0
- src/formatters/sarif.py +202 -0
- src/linters/file_header/atemporal_detector.py +11 -11
- src/linters/file_header/base_parser.py +89 -0
- src/linters/file_header/bash_parser.py +58 -0
- src/linters/file_header/config.py +76 -16
- src/linters/file_header/css_parser.py +70 -0
- src/linters/file_header/field_validator.py +35 -29
- src/linters/file_header/linter.py +113 -121
- src/linters/file_header/markdown_parser.py +124 -0
- src/linters/file_header/python_parser.py +14 -58
- src/linters/file_header/typescript_parser.py +73 -0
- src/linters/file_header/violation_builder.py +13 -12
- src/linters/file_placement/linter.py +9 -11
- src/linters/print_statements/config.py +7 -12
- src/linters/print_statements/linter.py +13 -15
- src/linters/print_statements/python_analyzer.py +8 -14
- src/linters/print_statements/typescript_analyzer.py +9 -14
- src/linters/print_statements/violation_builder.py +12 -14
- {thailint-0.5.0.dist-info → thailint-0.7.0.dist-info}/METADATA +148 -2
- {thailint-0.5.0.dist-info → thailint-0.7.0.dist-info}/RECORD +27 -20
- {thailint-0.5.0.dist-info → thailint-0.7.0.dist-info}/WHEEL +0 -0
- {thailint-0.5.0.dist-info → thailint-0.7.0.dist-info}/entry_points.txt +0 -0
- {thailint-0.5.0.dist-info → thailint-0.7.0.dist-info}/licenses/LICENSE +0 -0
src/cli.py
CHANGED
|
@@ -38,7 +38,11 @@ logger = logging.getLogger(__name__)
|
|
|
38
38
|
def format_option(func):
|
|
39
39
|
"""Add --format option to a command for output format selection."""
|
|
40
40
|
return click.option(
|
|
41
|
-
"--format",
|
|
41
|
+
"--format",
|
|
42
|
+
"-f",
|
|
43
|
+
type=click.Choice(["text", "json", "sarif"]),
|
|
44
|
+
default="text",
|
|
45
|
+
help="Output format",
|
|
42
46
|
)(func)
|
|
43
47
|
|
|
44
48
|
|
|
@@ -1661,5 +1665,118 @@ def _execute_print_statements_lint( # pylint: disable=too-many-arguments,too-ma
|
|
|
1661
1665
|
sys.exit(1 if print_statements_violations else 0)
|
|
1662
1666
|
|
|
1663
1667
|
|
|
1668
|
+
# File Header Command Helper Functions
|
|
1669
|
+
|
|
1670
|
+
|
|
1671
|
+
def _setup_file_header_orchestrator(
|
|
1672
|
+
path_objs: list[Path], config_file: str | None, verbose: bool, project_root: Path | None = None
|
|
1673
|
+
):
|
|
1674
|
+
"""Set up orchestrator for file-header command."""
|
|
1675
|
+
from src.orchestrator.core import Orchestrator
|
|
1676
|
+
from src.utils.project_root import get_project_root
|
|
1677
|
+
|
|
1678
|
+
# Use provided project_root or fall back to auto-detection
|
|
1679
|
+
if project_root is None:
|
|
1680
|
+
first_path = path_objs[0] if path_objs else Path.cwd()
|
|
1681
|
+
search_start = first_path if first_path.is_dir() else first_path.parent
|
|
1682
|
+
project_root = get_project_root(search_start)
|
|
1683
|
+
|
|
1684
|
+
orchestrator = Orchestrator(project_root=project_root)
|
|
1685
|
+
|
|
1686
|
+
if config_file:
|
|
1687
|
+
_load_config_file(orchestrator, config_file, verbose)
|
|
1688
|
+
|
|
1689
|
+
return orchestrator
|
|
1690
|
+
|
|
1691
|
+
|
|
1692
|
+
def _run_file_header_lint(orchestrator, path_objs: list[Path], recursive: bool):
|
|
1693
|
+
"""Execute file-header lint on files or directories."""
|
|
1694
|
+
all_violations = _execute_linting_on_paths(orchestrator, path_objs, recursive)
|
|
1695
|
+
return [v for v in all_violations if "file-header" in v.rule_id]
|
|
1696
|
+
|
|
1697
|
+
|
|
1698
|
+
@cli.command("file-header")
|
|
1699
|
+
@click.argument("paths", nargs=-1, type=click.Path())
|
|
1700
|
+
@click.option("--config", "-c", "config_file", type=click.Path(), help="Path to config file")
|
|
1701
|
+
@format_option
|
|
1702
|
+
@click.option("--recursive/--no-recursive", default=True, help="Scan directories recursively")
|
|
1703
|
+
@click.pass_context
|
|
1704
|
+
def file_header(
|
|
1705
|
+
ctx,
|
|
1706
|
+
paths: tuple[str, ...],
|
|
1707
|
+
config_file: str | None,
|
|
1708
|
+
format: str,
|
|
1709
|
+
recursive: bool,
|
|
1710
|
+
):
|
|
1711
|
+
"""Check file headers for mandatory fields and atemporal language.
|
|
1712
|
+
|
|
1713
|
+
Validates that source files have proper documentation headers containing
|
|
1714
|
+
required fields (Purpose, Scope, Overview, etc.) and don't use temporal
|
|
1715
|
+
language (dates, "currently", "now", etc.).
|
|
1716
|
+
|
|
1717
|
+
Supports Python, TypeScript, JavaScript, Bash, Markdown, and CSS files.
|
|
1718
|
+
|
|
1719
|
+
PATHS: Files or directories to lint (defaults to current directory if none provided)
|
|
1720
|
+
|
|
1721
|
+
Examples:
|
|
1722
|
+
|
|
1723
|
+
\b
|
|
1724
|
+
# Check current directory (all files recursively)
|
|
1725
|
+
thai-lint file-header
|
|
1726
|
+
|
|
1727
|
+
\b
|
|
1728
|
+
# Check specific directory
|
|
1729
|
+
thai-lint file-header src/
|
|
1730
|
+
|
|
1731
|
+
\b
|
|
1732
|
+
# Check single file
|
|
1733
|
+
thai-lint file-header src/cli.py
|
|
1734
|
+
|
|
1735
|
+
\b
|
|
1736
|
+
# Check multiple files
|
|
1737
|
+
thai-lint file-header src/cli.py src/api.py tests/
|
|
1738
|
+
|
|
1739
|
+
\b
|
|
1740
|
+
# Get JSON output
|
|
1741
|
+
thai-lint file-header --format json .
|
|
1742
|
+
|
|
1743
|
+
\b
|
|
1744
|
+
# Get SARIF output for CI/CD integration
|
|
1745
|
+
thai-lint file-header --format sarif src/
|
|
1746
|
+
|
|
1747
|
+
\b
|
|
1748
|
+
# Use custom config file
|
|
1749
|
+
thai-lint file-header --config .thailint.yaml src/
|
|
1750
|
+
"""
|
|
1751
|
+
verbose = ctx.obj.get("verbose", False)
|
|
1752
|
+
project_root = _get_project_root_from_context(ctx)
|
|
1753
|
+
|
|
1754
|
+
# Default to current directory if no paths provided
|
|
1755
|
+
if not paths:
|
|
1756
|
+
paths = (".",)
|
|
1757
|
+
|
|
1758
|
+
path_objs = [Path(p) for p in paths]
|
|
1759
|
+
|
|
1760
|
+
try:
|
|
1761
|
+
_execute_file_header_lint(path_objs, config_file, format, recursive, verbose, project_root)
|
|
1762
|
+
except Exception as e:
|
|
1763
|
+
_handle_linting_error(e, verbose)
|
|
1764
|
+
|
|
1765
|
+
|
|
1766
|
+
def _execute_file_header_lint( # pylint: disable=too-many-arguments,too-many-positional-arguments
|
|
1767
|
+
path_objs, config_file, format, recursive, verbose, project_root=None
|
|
1768
|
+
):
|
|
1769
|
+
"""Execute file-header lint."""
|
|
1770
|
+
_validate_paths_exist(path_objs)
|
|
1771
|
+
orchestrator = _setup_file_header_orchestrator(path_objs, config_file, verbose, project_root)
|
|
1772
|
+
file_header_violations = _run_file_header_lint(orchestrator, path_objs, recursive)
|
|
1773
|
+
|
|
1774
|
+
if verbose:
|
|
1775
|
+
logger.info(f"Found {len(file_header_violations)} file header violation(s)")
|
|
1776
|
+
|
|
1777
|
+
format_violations(file_header_violations, format)
|
|
1778
|
+
sys.exit(1 if file_header_violations else 0)
|
|
1779
|
+
|
|
1780
|
+
|
|
1664
1781
|
if __name__ == "__main__":
|
|
1665
1782
|
cli()
|
src/core/cli_utils.py
CHANGED
|
@@ -146,10 +146,12 @@ def format_violations(violations: list, output_format: str) -> None:
|
|
|
146
146
|
|
|
147
147
|
Args:
|
|
148
148
|
violations: List of violation objects with rule_id, file_path, line, column, message, severity
|
|
149
|
-
output_format: Output format ("text" or "
|
|
149
|
+
output_format: Output format ("text", "json", or "sarif")
|
|
150
150
|
"""
|
|
151
151
|
if output_format == "json":
|
|
152
152
|
_output_json(violations)
|
|
153
|
+
elif output_format == "sarif":
|
|
154
|
+
_output_sarif(violations)
|
|
153
155
|
else:
|
|
154
156
|
_output_text(violations)
|
|
155
157
|
|
|
@@ -177,6 +179,19 @@ def _output_json(violations: list) -> None:
|
|
|
177
179
|
click.echo(json.dumps(output, indent=2))
|
|
178
180
|
|
|
179
181
|
|
|
182
|
+
def _output_sarif(violations: list) -> None:
|
|
183
|
+
"""Output violations in SARIF v2.1.0 format.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
violations: List of violation objects
|
|
187
|
+
"""
|
|
188
|
+
from src.formatters.sarif import SarifFormatter
|
|
189
|
+
|
|
190
|
+
formatter = SarifFormatter()
|
|
191
|
+
sarif_doc = formatter.format(violations)
|
|
192
|
+
click.echo(json.dumps(sarif_doc, indent=2))
|
|
193
|
+
|
|
194
|
+
|
|
180
195
|
def _output_text(violations: list) -> None:
|
|
181
196
|
"""Output violations in human-readable text format.
|
|
182
197
|
|
src/core/registry.py
CHANGED
|
@@ -6,7 +6,7 @@ Scope: Dynamic rule management and discovery across all linter plugin packages
|
|
|
6
6
|
Overview: Implements rule registry that maintains a collection of registered linting rules indexed
|
|
7
7
|
by rule_id. Provides methods to register individual rules, retrieve rules by identifier, list
|
|
8
8
|
all available rules, and discover rules from packages using the RuleDiscovery helper. Enables
|
|
9
|
-
the extensible plugin architecture by allowing
|
|
9
|
+
the extensible plugin architecture by allowing dynamic rule registration without framework
|
|
10
10
|
modifications. Validates rule uniqueness and handles registration errors gracefully.
|
|
11
11
|
|
|
12
12
|
Dependencies: BaseLintRule, RuleDiscovery
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: SARIF formatter package for thai-lint output
|
|
3
|
+
|
|
4
|
+
Scope: SARIF v2.1.0 formatter implementation and package exports
|
|
5
|
+
|
|
6
|
+
Overview: Formatters package providing SARIF (Static Analysis Results Interchange Format) v2.1.0
|
|
7
|
+
output generation from thai-lint Violation objects. Enables integration with GitHub Code
|
|
8
|
+
Scanning, Azure DevOps, VS Code SARIF Viewer, and other industry-standard CI/CD platforms.
|
|
9
|
+
Provides the SarifFormatter class for converting violations to SARIF JSON documents.
|
|
10
|
+
|
|
11
|
+
Dependencies: sarif module for SarifFormatter class
|
|
12
|
+
|
|
13
|
+
Exports: SarifFormatter class from sarif.py module
|
|
14
|
+
|
|
15
|
+
Interfaces: from src.formatters.sarif import SarifFormatter
|
|
16
|
+
|
|
17
|
+
Implementation: Package initialization with SarifFormatter export
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from src.formatters.sarif import SarifFormatter
|
|
21
|
+
|
|
22
|
+
__all__ = ["SarifFormatter"]
|
src/formatters/sarif.py
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: SARIF v2.1.0 formatter for converting Violation objects to SARIF JSON documents
|
|
3
|
+
|
|
4
|
+
Scope: SARIF document generation, tool metadata, result conversion, location mapping
|
|
5
|
+
|
|
6
|
+
Overview: Implements SarifFormatter class that converts thai-lint Violation objects to SARIF
|
|
7
|
+
(Static Analysis Results Interchange Format) v2.1.0 compliant JSON documents. Produces
|
|
8
|
+
output compatible with GitHub Code Scanning, Azure DevOps, VS Code SARIF Viewer, and
|
|
9
|
+
other industry-standard static analysis tools. Handles proper field mapping including
|
|
10
|
+
1-indexed column conversion, rule metadata deduplication, and tool versioning.
|
|
11
|
+
|
|
12
|
+
Dependencies: src (for __version__), src.core.types (Violation, Severity)
|
|
13
|
+
|
|
14
|
+
Exports: SarifFormatter class with format() method
|
|
15
|
+
|
|
16
|
+
Interfaces: SarifFormatter.format(violations: list[Violation]) -> dict
|
|
17
|
+
|
|
18
|
+
Implementation: Converts Violation objects to SARIF structure with proper indexing and metadata
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
from src import __version__
|
|
24
|
+
from src.core.types import Violation
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class SarifFormatter:
|
|
28
|
+
"""Formats Violation objects as SARIF v2.1.0 JSON documents.
|
|
29
|
+
|
|
30
|
+
SARIF (Static Analysis Results Interchange Format) is the OASIS standard
|
|
31
|
+
for static analysis tool output, enabling integration with GitHub Code
|
|
32
|
+
Scanning, Azure DevOps, and other CI/CD platforms.
|
|
33
|
+
|
|
34
|
+
Attributes:
|
|
35
|
+
tool_name: Name of the tool in SARIF output (default: "thai-lint")
|
|
36
|
+
tool_version: Version string for the tool (default: package version)
|
|
37
|
+
information_uri: URL for tool documentation
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
SARIF_VERSION = "2.1.0"
|
|
41
|
+
SARIF_SCHEMA = (
|
|
42
|
+
"https://raw.githubusercontent.com/oasis-tcs/sarif-spec/"
|
|
43
|
+
"main/sarif-2.1/schema/sarif-schema-2.1.0.json"
|
|
44
|
+
)
|
|
45
|
+
DEFAULT_INFORMATION_URI = "https://github.com/be-wise-be-kind/thai-lint"
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
tool_name: str = "thai-lint",
|
|
50
|
+
tool_version: str | None = None,
|
|
51
|
+
information_uri: str | None = None,
|
|
52
|
+
) -> None:
|
|
53
|
+
"""Initialize SarifFormatter with tool metadata.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
tool_name: Name of the tool (default: "thai-lint")
|
|
57
|
+
tool_version: Version string (default: package __version__)
|
|
58
|
+
information_uri: URL for tool documentation
|
|
59
|
+
"""
|
|
60
|
+
self.tool_name = tool_name
|
|
61
|
+
self.tool_version = tool_version or __version__
|
|
62
|
+
self.information_uri = information_uri or self.DEFAULT_INFORMATION_URI
|
|
63
|
+
|
|
64
|
+
def format(self, violations: list[Violation]) -> dict[str, Any]:
|
|
65
|
+
"""Convert violations to SARIF v2.1.0 document.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
violations: List of Violation objects to format
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
SARIF v2.1.0 compliant dictionary ready for JSON serialization
|
|
72
|
+
"""
|
|
73
|
+
return {
|
|
74
|
+
"version": self.SARIF_VERSION,
|
|
75
|
+
"$schema": self.SARIF_SCHEMA,
|
|
76
|
+
"runs": [self._create_run(violations)],
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
def _create_run(self, violations: list[Violation]) -> dict[str, Any]:
|
|
80
|
+
"""Create a SARIF run object containing tool and results.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
violations: List of violations for this run
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
SARIF run object with tool and results
|
|
87
|
+
"""
|
|
88
|
+
return {
|
|
89
|
+
"tool": self._create_tool(violations),
|
|
90
|
+
"results": [self._create_result(v) for v in violations],
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
def _create_tool(self, violations: list[Violation]) -> dict[str, Any]:
|
|
94
|
+
"""Create SARIF tool object with driver metadata.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
violations: List of violations to extract rule metadata from
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
SARIF tool object with driver
|
|
101
|
+
"""
|
|
102
|
+
return {
|
|
103
|
+
"driver": {
|
|
104
|
+
"name": self.tool_name,
|
|
105
|
+
"version": self.tool_version,
|
|
106
|
+
"informationUri": self.information_uri,
|
|
107
|
+
"rules": self._create_rules(violations),
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
def _create_rules(self, violations: list[Violation]) -> list[dict[str, Any]]:
|
|
112
|
+
"""Create deduplicated SARIF rules array from violations.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
violations: List of violations to extract unique rules from
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
List of SARIF rule objects with unique IDs
|
|
119
|
+
"""
|
|
120
|
+
seen_rule_ids: set[str] = set()
|
|
121
|
+
rules: list[dict[str, Any]] = []
|
|
122
|
+
|
|
123
|
+
for violation in violations:
|
|
124
|
+
if violation.rule_id not in seen_rule_ids:
|
|
125
|
+
seen_rule_ids.add(violation.rule_id)
|
|
126
|
+
rules.append(self._create_rule(violation))
|
|
127
|
+
|
|
128
|
+
return rules
|
|
129
|
+
|
|
130
|
+
def _create_rule(self, violation: Violation) -> dict[str, Any]:
|
|
131
|
+
"""Create SARIF rule object from violation.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
violation: Violation to extract rule metadata from
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
SARIF rule object with id and shortDescription
|
|
138
|
+
"""
|
|
139
|
+
# Extract rule category from rule_id (e.g., "nesting" from "nesting.excessive-depth")
|
|
140
|
+
parts = violation.rule_id.split(".")
|
|
141
|
+
category = parts[0] if parts else violation.rule_id
|
|
142
|
+
|
|
143
|
+
descriptions: dict[str, str] = {
|
|
144
|
+
"file-placement": "File placement violation",
|
|
145
|
+
"nesting": "Nesting depth violation",
|
|
146
|
+
"srp": "Single Responsibility Principle violation",
|
|
147
|
+
"dry": "Don't Repeat Yourself violation",
|
|
148
|
+
"magic-number": "Magic number violation",
|
|
149
|
+
"magic-numbers": "Magic number violation",
|
|
150
|
+
"file-header": "File header violation",
|
|
151
|
+
"print-statements": "Print statement violation",
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
description = descriptions.get(category, f"Rule: {violation.rule_id}")
|
|
155
|
+
|
|
156
|
+
return {
|
|
157
|
+
"id": violation.rule_id,
|
|
158
|
+
"shortDescription": {
|
|
159
|
+
"text": description,
|
|
160
|
+
},
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
def _create_result(self, violation: Violation) -> dict[str, Any]:
|
|
164
|
+
"""Create SARIF result object from violation.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
violation: Violation to convert to SARIF result
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
SARIF result object with ruleId, level, message, locations
|
|
171
|
+
"""
|
|
172
|
+
# thai-lint uses binary severity (ERROR only), map all to "error" level
|
|
173
|
+
return {
|
|
174
|
+
"ruleId": violation.rule_id,
|
|
175
|
+
"level": "error",
|
|
176
|
+
"message": {
|
|
177
|
+
"text": violation.message,
|
|
178
|
+
},
|
|
179
|
+
"locations": [self._create_location(violation)],
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
def _create_location(self, violation: Violation) -> dict[str, Any]:
|
|
183
|
+
"""Create SARIF location object from violation.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
violation: Violation with location information
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
SARIF location object with physicalLocation
|
|
190
|
+
"""
|
|
191
|
+
return {
|
|
192
|
+
"physicalLocation": {
|
|
193
|
+
"artifactLocation": {
|
|
194
|
+
"uri": violation.file_path,
|
|
195
|
+
},
|
|
196
|
+
"region": {
|
|
197
|
+
"startLine": violation.line,
|
|
198
|
+
# SARIF uses 1-indexed columns, Violation uses 0-indexed
|
|
199
|
+
"startColumn": violation.column + 1,
|
|
200
|
+
},
|
|
201
|
+
}
|
|
202
|
+
}
|
|
@@ -1,21 +1,21 @@
|
|
|
1
1
|
"""
|
|
2
|
-
File: src/linters/file_header/atemporal_detector.py
|
|
3
2
|
Purpose: Detects temporal language patterns in file headers
|
|
4
|
-
Exports: AtemporalDetector class
|
|
5
|
-
Depends: re module for regex matching
|
|
6
|
-
Implements: Regex-based pattern matching with configurable patterns
|
|
7
|
-
Related: linter.py for detector usage, violation_builder.py for violation creation
|
|
8
3
|
|
|
9
|
-
|
|
10
|
-
|
|
4
|
+
Scope: File header validation for atemporal language compliance
|
|
5
|
+
|
|
6
|
+
Overview: Implements pattern-based detection of temporal language that violates atemporal
|
|
11
7
|
documentation requirements. Detects dates, temporal qualifiers, state change language,
|
|
12
8
|
and future references using regex patterns. Provides violation details for each pattern match.
|
|
9
|
+
Uses four pattern categories (dates, temporal qualifiers, state changes, future references)
|
|
10
|
+
to identify violations and returns detailed information for each match.
|
|
11
|
+
|
|
12
|
+
Dependencies: re module for regex-based pattern matching
|
|
13
|
+
|
|
14
|
+
Exports: AtemporalDetector class with detect_violations method
|
|
13
15
|
|
|
14
|
-
|
|
15
|
-
detector = AtemporalDetector()
|
|
16
|
-
violations = detector.detect_violations(header_text)
|
|
16
|
+
Interfaces: detect_violations(text) -> list[tuple[str, str, int]] returns pattern matches with line numbers
|
|
17
17
|
|
|
18
|
-
|
|
18
|
+
Implementation: Regex-based pattern matching with predefined patterns organized by category
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
21
|
import re
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Base class for file header parsers with common field parsing logic
|
|
3
|
+
|
|
4
|
+
Scope: File header parsing infrastructure for all language-specific parsers
|
|
5
|
+
|
|
6
|
+
Overview: Provides common field parsing functionality shared across all language-specific
|
|
7
|
+
header parsers. Implements the parse_fields method and helper methods for
|
|
8
|
+
detecting field lines and saving fields. Uses template method pattern where subclasses
|
|
9
|
+
implement extract_header for language-specific header extraction while this base class
|
|
10
|
+
handles field parsing logic. Supports multi-line field values and field continuation.
|
|
11
|
+
|
|
12
|
+
Dependencies: re module for field pattern matching, abc module for abstract base class
|
|
13
|
+
|
|
14
|
+
Exports: BaseHeaderParser abstract base class
|
|
15
|
+
|
|
16
|
+
Interfaces: extract_header(code) abstract method, parse_fields(header) -> dict[str, str] for field extraction
|
|
17
|
+
|
|
18
|
+
Implementation: Template method pattern with shared field parsing and language-specific extraction
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import re
|
|
22
|
+
from abc import ABC, abstractmethod
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class BaseHeaderParser(ABC):
|
|
26
|
+
"""Base class for file header parsers with common field parsing logic."""
|
|
27
|
+
|
|
28
|
+
# Pattern to match field names (word characters and /)
|
|
29
|
+
FIELD_NAME_PATTERN = re.compile(r"^[\w/]+$")
|
|
30
|
+
|
|
31
|
+
@abstractmethod
|
|
32
|
+
def extract_header(self, code: str) -> str | None:
|
|
33
|
+
"""Extract header from source code.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
code: Source code
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Header content or None if not found
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def parse_fields(self, header: str) -> dict[str, str]: # thailint: ignore[nesting]
|
|
43
|
+
"""Parse structured fields from header text.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
header: Header text
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
Dictionary mapping field_name -> field_value
|
|
50
|
+
"""
|
|
51
|
+
fields: dict[str, str] = {}
|
|
52
|
+
current_field: str | None = None
|
|
53
|
+
current_value: list[str] = []
|
|
54
|
+
|
|
55
|
+
for line in header.split("\n"):
|
|
56
|
+
if self._is_field_line(line):
|
|
57
|
+
self._save_field(fields, current_field, current_value)
|
|
58
|
+
current_field, current_value = self._start_new_field(line)
|
|
59
|
+
elif current_field:
|
|
60
|
+
current_value.append(line.strip())
|
|
61
|
+
|
|
62
|
+
self._save_field(fields, current_field, current_value)
|
|
63
|
+
return fields
|
|
64
|
+
|
|
65
|
+
def _is_field_line(self, line: str) -> bool:
|
|
66
|
+
"""Check if line starts a new field."""
|
|
67
|
+
if ":" not in line:
|
|
68
|
+
return False
|
|
69
|
+
|
|
70
|
+
colon_pos = line.find(":")
|
|
71
|
+
if colon_pos <= 0:
|
|
72
|
+
return False
|
|
73
|
+
|
|
74
|
+
field_name = line[:colon_pos].strip()
|
|
75
|
+
return bool(self.FIELD_NAME_PATTERN.match(field_name))
|
|
76
|
+
|
|
77
|
+
def _start_new_field(self, line: str) -> tuple[str, list[str]]:
|
|
78
|
+
"""Parse a field line and start tracking its value."""
|
|
79
|
+
parts = line.split(":", 1)
|
|
80
|
+
field_name = parts[0].strip()
|
|
81
|
+
initial_value = parts[1].strip() if len(parts) > 1 else ""
|
|
82
|
+
return field_name, [initial_value] if initial_value else []
|
|
83
|
+
|
|
84
|
+
def _save_field(
|
|
85
|
+
self, fields: dict[str, str], field_name: str | None, value_lines: list[str]
|
|
86
|
+
) -> None:
|
|
87
|
+
"""Save accumulated field value to fields dict."""
|
|
88
|
+
if field_name:
|
|
89
|
+
fields[field_name] = "\n".join(value_lines).strip()
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Bash shell script comment header extraction and parsing
|
|
3
|
+
|
|
4
|
+
Scope: Bash and shell script file header parsing
|
|
5
|
+
|
|
6
|
+
Overview: Extracts hash comment headers from Bash shell scripts. Handles shebang lines
|
|
7
|
+
(#!/bin/bash, #!/usr/bin/env bash, etc.) by skipping them and extracting the
|
|
8
|
+
comment block that follows. Parses structured header fields from comment content.
|
|
9
|
+
Extracts contiguous comment blocks from the start of the file and processes them
|
|
10
|
+
into structured fields for validation.
|
|
11
|
+
|
|
12
|
+
Dependencies: base_parser.BaseHeaderParser for common field parsing functionality
|
|
13
|
+
|
|
14
|
+
Exports: BashHeaderParser class
|
|
15
|
+
|
|
16
|
+
Interfaces: extract_header(code) -> str | None for comment extraction, parse_fields(header) inherited from base
|
|
17
|
+
|
|
18
|
+
Implementation: Skips shebang and preamble, then extracts contiguous hash comment block
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from src.linters.file_header.base_parser import BaseHeaderParser
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class BashHeaderParser(BaseHeaderParser):
|
|
25
|
+
"""Extracts and parses Bash file headers from comment blocks."""
|
|
26
|
+
|
|
27
|
+
def extract_header(self, code: str) -> str | None:
|
|
28
|
+
"""Extract comment header from Bash script."""
|
|
29
|
+
if not code or not code.strip():
|
|
30
|
+
return None
|
|
31
|
+
|
|
32
|
+
lines = self._skip_preamble(code.split("\n"))
|
|
33
|
+
header_lines = self._extract_comment_block(lines)
|
|
34
|
+
|
|
35
|
+
return "\n".join(header_lines) if header_lines else None
|
|
36
|
+
|
|
37
|
+
def _skip_preamble(self, lines: list[str]) -> list[str]: # thailint: ignore[nesting]
|
|
38
|
+
"""Skip shebang and leading empty lines."""
|
|
39
|
+
result = []
|
|
40
|
+
skipping = True
|
|
41
|
+
for line in lines:
|
|
42
|
+
stripped = line.strip()
|
|
43
|
+
if skipping:
|
|
44
|
+
if stripped.startswith("#!") or not stripped:
|
|
45
|
+
continue
|
|
46
|
+
skipping = False
|
|
47
|
+
result.append(stripped)
|
|
48
|
+
return result
|
|
49
|
+
|
|
50
|
+
def _extract_comment_block(self, lines: list[str]) -> list[str]:
|
|
51
|
+
"""Extract contiguous comment lines from start of input."""
|
|
52
|
+
result = []
|
|
53
|
+
for line in lines:
|
|
54
|
+
if line.startswith("#"):
|
|
55
|
+
result.append(line[1:].strip())
|
|
56
|
+
else:
|
|
57
|
+
break
|
|
58
|
+
return result
|