true-formatter 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,28 @@
1
+ from .core import (
2
+ Mode,
3
+ TargetVersion,
4
+ format_str,
5
+ format_file_contents,
6
+ check_format,
7
+ )
8
+ from .cli import main
9
+ from .exceptions import TrueError, TrueFormattingError, TrueConfigError
10
+ from .rules import Rule, RuleSet, DEFAULT_RULES
11
+
12
+ __all__ = [
13
+ "format_str",
14
+ "format_file_contents",
15
+ "check_format",
16
+ "main",
17
+ "Mode",
18
+ "TargetVersion",
19
+ "Rule",
20
+ "RuleSet",
21
+ "DEFAULT_RULES",
22
+ "TrueError",
23
+ "TrueFormattingError",
24
+ "TrueConfigError",
25
+ ]
26
+
27
+ __version__ = "0.1.0"
28
+ __author__ = "True Contributors"
@@ -0,0 +1,7 @@
1
+ from __future__ import annotations
2
+
3
+ from .cli import main
4
+
5
+
6
+ if __name__ == "__main__":
7
+ raise SystemExit(main())
true_formatter/cli.py ADDED
@@ -0,0 +1,139 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ from .core import Mode, format_file_contents, check_format
8
+ from .exceptions import TrueFormattingError
9
+
10
+
11
+ def build_parser() -> argparse.ArgumentParser:
12
+ p = argparse.ArgumentParser(
13
+ prog="true",
14
+ description="True — the uncompromising Python formatter.",
15
+ )
16
+ p.add_argument(
17
+ "src",
18
+ metavar="SRC",
19
+ nargs="*",
20
+ help="Files or directories to format (use '-' for stdin).",
21
+ )
22
+ p.add_argument(
23
+ "--check",
24
+ action="store_true",
25
+ help="Don't write files; exit non-zero if any would change.",
26
+ )
27
+ p.add_argument(
28
+ "--diff",
29
+ action="store_true",
30
+ help="Show a diff of changes instead of rewriting.",
31
+ )
32
+ p.add_argument(
33
+ "-l",
34
+ "--line-length",
35
+ type=int,
36
+ default=88,
37
+ metavar="INT",
38
+ help="Maximum line length (default: 88).",
39
+ )
40
+ p.add_argument(
41
+ "-S",
42
+ "--skip-string-normalization",
43
+ action="store_true",
44
+ help="Don't normalise string quotes.",
45
+ )
46
+ p.add_argument(
47
+ "--quiet",
48
+ "-q",
49
+ action="store_true",
50
+ help="Suppress all non-error output.",
51
+ )
52
+ return p
53
+
54
+
55
+ def main(argv: list[str] | None = None) -> int:
56
+ parser = build_parser()
57
+ args = parser.parse_args(argv)
58
+
59
+ mode = Mode(
60
+ line_length=args.line_length,
61
+ skip_string_normalization=args.skip_string_normalization,
62
+ )
63
+
64
+ if not args.src:
65
+ parser.print_help()
66
+ return 0
67
+
68
+ changed = 0
69
+ for src_path in _iter_sources(args.src):
70
+ changed += _process(src_path, mode, args)
71
+
72
+ if args.check:
73
+ return 1 if changed else 0
74
+ return 0
75
+
76
+
77
+ def _iter_sources(srcs: list[str]):
78
+ for s in srcs:
79
+ if s == "-":
80
+ yield "-"
81
+ continue
82
+ p = Path(s)
83
+ if p.is_dir():
84
+ yield from p.rglob("*.py")
85
+ else:
86
+ yield p
87
+
88
+
89
+ def _process(path, mode: Mode, args) -> int:
90
+ if path == "-":
91
+ src = sys.stdin.read()
92
+ filename = "<stdin>"
93
+ else:
94
+ src = Path(path).read_text(encoding="utf-8")
95
+ filename = str(path)
96
+
97
+ try:
98
+ formatted = format_file_contents(src, mode=mode)
99
+ except TrueFormattingError as exc:
100
+ print(f"error: {filename}: {exc}", file=sys.stderr)
101
+ return 0
102
+
103
+ if formatted == src:
104
+ if not args.quiet:
105
+ print(f"unchanged: {filename}")
106
+ return 0
107
+
108
+ if args.check:
109
+ if not args.quiet:
110
+ print(f"would reformat: {filename}")
111
+ return 1
112
+
113
+ if args.diff:
114
+ _show_diff(src, formatted, filename)
115
+ return 1
116
+
117
+ if path == "-":
118
+ sys.stdout.write(formatted)
119
+ else:
120
+ Path(path).write_text(formatted, encoding="utf-8")
121
+ if not args.quiet:
122
+ print(f"reformatted: {filename}")
123
+ return 1
124
+
125
+
126
+ def _show_diff(original: str, formatted: str, filename: str) -> None:
127
+ import difflib
128
+
129
+ diff = difflib.unified_diff(
130
+ original.splitlines(keepends=True),
131
+ formatted.splitlines(keepends=True),
132
+ fromfile=f"original/{filename}",
133
+ tofile=f"formatted/{filename}",
134
+ )
135
+ sys.stdout.writelines(diff)
136
+
137
+
138
+ if __name__ == "__main__":
139
+ sys.exit(main())
true_formatter/core.py ADDED
@@ -0,0 +1,107 @@
1
+ from __future__ import annotations
2
+
3
+ import tokenize
4
+ import io
5
+ from dataclasses import dataclass, field
6
+ from enum import Enum
7
+ from typing import Iterator
8
+
9
+ from .rules import RuleSet, DEFAULT_RULES
10
+ from .exceptions import TrueFormattingError
11
+
12
+
13
+ class TargetVersion(Enum):
14
+ PY38 = (3, 8)
15
+ PY39 = (3, 9)
16
+ PY310 = (3, 10)
17
+ PY311 = (3, 11)
18
+ PY312 = (3, 12)
19
+
20
+
21
+ @dataclass
22
+ class Mode:
23
+ target_versions: set[TargetVersion] = field(default_factory=set)
24
+ line_length: int = 88
25
+ string_normalization: bool = True
26
+ magic_trailing_comma: bool = True
27
+ skip_string_normalization: bool = False
28
+ rules: RuleSet = field(default_factory=lambda: DEFAULT_RULES)
29
+
30
+ @property
31
+ def quote_char(self) -> str:
32
+ if self.skip_string_normalization or not self.string_normalization:
33
+ return "'"
34
+ return '"'
35
+
36
+
37
+ def format_str(src_contents: str, *, mode: Mode) -> str:
38
+ if not isinstance(src_contents, str):
39
+ raise TypeError(f"src_contents must be str, got {type(src_contents).__name__!r}")
40
+ try:
41
+ tokens = list(_tokenize(src_contents))
42
+ except tokenize.TokenError as exc:
43
+ raise TrueFormattingError(f"Cannot tokenize source: {exc}") from exc
44
+
45
+ transformed = _apply_rules(tokens, mode)
46
+ result = _emit(transformed, src_contents, mode)
47
+ return result
48
+
49
+
50
+ def format_file_contents(src_contents: str, *, mode: Mode) -> str:
51
+ result = format_str(src_contents, mode=mode)
52
+ if not result.endswith("\n"):
53
+ result += "\n"
54
+ return result
55
+
56
+
57
+ def check_format(src_contents: str, *, mode: Mode) -> bool:
58
+ try:
59
+ formatted = format_str(src_contents, mode=mode)
60
+ except TrueFormattingError:
61
+ return False
62
+ return formatted == src_contents
63
+
64
+
65
+ def _tokenize(src: str) -> Iterator[tokenize.TokenInfo]:
66
+ yield from tokenize.generate_tokens(io.StringIO(src).readline)
67
+
68
+
69
+ def _apply_rules(tokens: list[tokenize.TokenInfo], mode: Mode) -> list[tokenize.TokenInfo]:
70
+ result = tokens
71
+ for rule in mode.rules:
72
+ result = rule.apply(result, mode)
73
+ return result
74
+
75
+
76
+ def _emit(tokens: list[tokenize.TokenInfo], original: str, mode: Mode) -> str:
77
+ try:
78
+ src = tokenize.untokenize(tokens)
79
+ except Exception:
80
+ src = original
81
+ return _text_level_format(src, mode)
82
+
83
+
84
+ def _text_level_format(src: str, mode: Mode) -> str:
85
+ from .transforms import (
86
+ fix_indentation,
87
+ fix_multiple_imports,
88
+ split_semicolons,
89
+ fix_pep8_whitespace,
90
+ fix_operator_priority_spacing,
91
+ normalise_strings,
92
+ fix_trailing_whitespace,
93
+ fix_blank_lines,
94
+ ensure_final_newline,
95
+ )
96
+
97
+ src = fix_indentation(src)
98
+ src = fix_multiple_imports(src)
99
+ src = split_semicolons(src)
100
+ src = fix_pep8_whitespace(src)
101
+ src = fix_operator_priority_spacing(src)
102
+ if not mode.skip_string_normalization and mode.string_normalization:
103
+ src = normalise_strings(src, mode.quote_char)
104
+ src = fix_trailing_whitespace(src)
105
+ src = fix_blank_lines(src)
106
+ src = ensure_final_newline(src)
107
+ return src
@@ -0,0 +1,13 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ class TrueError(Exception):
5
+ pass
6
+
7
+
8
+ class TrueFormattingError(TrueError):
9
+ pass
10
+
11
+
12
+ class TrueConfigError(TrueError):
13
+ pass
@@ -0,0 +1,70 @@
1
+ from __future__ import annotations
2
+
3
+ import tokenize
4
+ from abc import ABC, abstractmethod
5
+ from dataclasses import dataclass
6
+ from typing import TYPE_CHECKING
7
+
8
+ if TYPE_CHECKING:
9
+ from .core import Mode
10
+
11
+
12
+ class Rule(ABC):
13
+ name: str = "unnamed"
14
+
15
+ @abstractmethod
16
+ def apply(
17
+ self,
18
+ tokens: list[tokenize.TokenInfo],
19
+ mode: "Mode",
20
+ ) -> list[tokenize.TokenInfo]: ...
21
+
22
+
23
+ class NormaliseCommaSpacing(Rule):
24
+ name = "comma-spacing"
25
+
26
+ def apply(self, tokens, mode):
27
+ out: list[tokenize.TokenInfo] = []
28
+ for i, tok in enumerate(tokens):
29
+ out.append(tok)
30
+ return out
31
+
32
+
33
+ class NormaliseColonSpacing(Rule):
34
+ name = "colon-spacing"
35
+
36
+ def apply(self, tokens, mode):
37
+ return tokens
38
+
39
+
40
+ class RemoveExtraParens(Rule):
41
+ name = "extra-parens"
42
+
43
+ def apply(self, tokens, mode):
44
+ return tokens
45
+
46
+
47
+ @dataclass
48
+ class RuleSet:
49
+ rules: list[Rule]
50
+
51
+ def __iter__(self):
52
+ return iter(self.rules)
53
+
54
+ def __len__(self):
55
+ return len(self.rules)
56
+
57
+ def add(self, rule: Rule) -> "RuleSet":
58
+ return RuleSet(rules=[*self.rules, rule])
59
+
60
+ def remove(self, name: str) -> "RuleSet":
61
+ return RuleSet(rules=[r for r in self.rules if r.name != name])
62
+
63
+
64
+ DEFAULT_RULES = RuleSet(
65
+ rules=[
66
+ NormaliseCommaSpacing(),
67
+ NormaliseColonSpacing(),
68
+ RemoveExtraParens(),
69
+ ]
70
+ )
@@ -0,0 +1,586 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ import tokenize
5
+ import io
6
+ from collections import defaultdict
7
+
8
+ # ─── string normalisation ─────────────────────────────────────────────────────
9
+
10
+ def normalise_strings(src: str, quote_char: str = '"') -> str:
11
+ result: list[str] = []
12
+ tokens = _safe_tokenize(src)
13
+ prev_end = 0
14
+ for tok in tokens:
15
+ if tok.type != tokenize.STRING:
16
+ continue
17
+ start_offset = _offset(src, tok.start)
18
+ end_offset = _offset(src, tok.end)
19
+ result.append(src[prev_end:start_offset])
20
+ result.append(_rewrite_string(tok.string, quote_char))
21
+ prev_end = end_offset
22
+ result.append(src[prev_end:])
23
+ return "".join(result)
24
+
25
+
26
+ def _rewrite_string(raw: str, quote_char: str) -> str:
27
+ for triple in ('"""', "'''"):
28
+ if raw.lstrip("rRbBuUfF").startswith(triple):
29
+ return raw
30
+ prefix = ""
31
+ rest = raw
32
+ while rest and rest[0].lower() in "rbuf":
33
+ prefix += rest[0]
34
+ rest = rest[1:]
35
+ if not rest or rest[0] not in ('"', "'"):
36
+ return raw
37
+ current_quote = rest[0]
38
+ if current_quote == quote_char:
39
+ return raw
40
+ inner = rest[1:-1]
41
+ if quote_char in inner:
42
+ return raw
43
+ inner = inner.replace(f"\\{current_quote}", current_quote)
44
+ return f"{prefix}{quote_char}{inner}{quote_char}"
45
+
46
+
47
+ # ─── PEP 8 token-based whitespace fixer ──────────────────────────────────────
48
+
49
+ _OPENING = {"(", "[", "{"}
50
+ _CLOSING = {")", "]", "}"}
51
+
52
+ _BINARY_OPS = {
53
+ "+", "-", "*", "/", "//", "%", "**",
54
+ "==", "!=", "<", ">", "<=", ">=",
55
+ "&", "|", "^", "<<", ">>",
56
+ "+=", "-=", "*=", "/=", "//=", "%=", "**=",
57
+ "&=", "|=", "^=", "<<=", ">>=",
58
+ "->", "@", "@=",
59
+ }
60
+
61
+ _UNARY_PREV_OPS = _OPENING | {
62
+ ",", ";", "=", ":", "+=", "-=", "*=", "/=",
63
+ "//=", "%=", "**=", "&=", "|=", "^=", "<<=", ">>=",
64
+ "==", "!=", "<", ">", "<=", ">=", "&", "|", "^",
65
+ "<<", ">>", "//", "%", "**", "+", "-", "*", "/",
66
+ "return", "yield", "not", "and", "or", "in", "is",
67
+ "lambda", "else", "if",
68
+ }
69
+
70
+ _KEYWORD_BEFORE_PAREN = {
71
+ "if", "else", "elif", "while", "for", "with",
72
+ "assert", "return", "yield", "del", "and", "or",
73
+ "not", "in", "is", "lambda", "class", "def",
74
+ "import", "from", "raise", "except", "as",
75
+ }
76
+
77
+
78
+ def fix_pep8_whitespace(src: str) -> str:
79
+ tokens = _safe_tokenize(src)
80
+ if not tokens:
81
+ return src
82
+
83
+ meaningful = [
84
+ t for t in tokens
85
+ if t.type not in (
86
+ tokenize.ENCODING, tokenize.NL, tokenize.NEWLINE,
87
+ tokenize.INDENT, tokenize.DEDENT, tokenize.ENDMARKER,
88
+ )
89
+ ]
90
+
91
+ n = len(meaningful)
92
+
93
+ def prev_m(i):
94
+ return meaningful[i - 1] if i > 0 else None
95
+
96
+ def next_m(i):
97
+ return meaningful[i + 1] if i + 1 < n else None
98
+
99
+ # space_before[i] = desired whitespace string before meaningful[i]
100
+ # None means "keep whatever is in the original source"
101
+ space_before: list[str | None] = [None] * n
102
+
103
+ # Track paren depth stack: each entry is the token string that opened it
104
+ # to distinguish def foo( vs foo(
105
+ paren_stack: list[str] = [] # 'def'/'class' or 'call' or 'other'
106
+
107
+ def in_params() -> bool:
108
+ return bool(paren_stack) and paren_stack[-1] in ("def", "class")
109
+
110
+ def in_brackets() -> bool:
111
+ return bool(paren_stack)
112
+
113
+ for i, tok in enumerate(meaningful):
114
+ p = prev_m(i)
115
+ nx = next_m(i)
116
+ s = tok.string
117
+ tt = tok.type
118
+
119
+ # ── track bracket depth ──────────────────────────────────────────────
120
+ if s in _OPENING:
121
+ # look two tokens back: def foo( — p is 'foo', pp is 'def'
122
+ pp = meaningful[i - 2] if i >= 2 else None
123
+ if p and p.type == tokenize.NAME and p.string in ("def", "class", "lambda"):
124
+ paren_stack.append("def")
125
+ elif pp and pp.type == tokenize.NAME and pp.string in ("def", "class", "lambda"):
126
+ paren_stack.append("def")
127
+ elif p and p.type == tokenize.NAME and p.string not in _KEYWORD_BEFORE_PAREN:
128
+ paren_stack.append("call")
129
+ elif p and p.type == tokenize.OP and p.string in (")", "]"):
130
+ paren_stack.append("call")
131
+ else:
132
+ paren_stack.append("other")
133
+
134
+ if s in _CLOSING and paren_stack:
135
+ paren_stack.pop()
136
+
137
+ # ── E201: no space after opening bracket ─────────────────────────────
138
+ if s in _OPENING:
139
+ if nx and nx.string not in _CLOSING:
140
+ space_before[i + 1] = ""
141
+
142
+ # ── E202: no space before closing bracket ────────────────────────────
143
+ if s in _CLOSING:
144
+ if p and p.string not in _OPENING:
145
+ space_before[i] = ""
146
+
147
+ # ── E203: no space before , ; ────────────────────────────────────────
148
+ if s in (",", ";"):
149
+ space_before[i] = ""
150
+
151
+ # ── E231: space after , ; ────────────────────────────────────────────
152
+ if s in (",", ";"):
153
+ if nx and nx.string not in _CLOSING and nx.start[0] == tok.start[0]:
154
+ space_before[i + 1] = " "
155
+
156
+ # ── E203: no space before colon (dict, class, def, if, etc.) ──────────
157
+ if s == ":":
158
+ space_before[i] = ""
159
+ if in_brackets() and paren_stack[-1] == "other":
160
+ if nx and nx.string not in _CLOSING and nx.start[0] == tok.start[0]:
161
+ space_before[i + 1] = " "
162
+
163
+ # ── E211: no space before ( or [ used as call/index ──────────────────
164
+ if s in ("(", "["):
165
+ if p and p.type == tokenize.NAME and p.string not in _KEYWORD_BEFORE_PAREN:
166
+ space_before[i] = ""
167
+ elif p and p.type == tokenize.OP and p.string in (")", "]"):
168
+ space_before[i] = ""
169
+
170
+ # ── E225: spaces around binary operators ─────────────────────────────
171
+ if tt == tokenize.OP and s in _BINARY_OPS:
172
+ is_unary = False
173
+ if s in ("+", "-", "~", "*", "**"):
174
+ if p is None:
175
+ is_unary = True
176
+ elif p.type == tokenize.OP and (p.string in _UNARY_PREV_OPS or p.string in _OPENING):
177
+ is_unary = True
178
+ elif p.type == tokenize.NAME and p.string in _UNARY_PREV_OPS:
179
+ is_unary = True
180
+ elif p.type in (tokenize.NEWLINE, tokenize.NL, tokenize.INDENT):
181
+ is_unary = True
182
+
183
+ if not is_unary:
184
+ space_before[i] = " "
185
+ if nx and nx.start[0] == tok.start[0]:
186
+ space_before[i + 1] = " "
187
+
188
+ # ── E251/E252: = spacing in params depends on annotation ─────────────
189
+ if s == "=" and in_brackets() and in_params():
190
+ # Check if this param is annotated: look back for : between last , or ( and here
191
+ annotated = _param_is_annotated(meaningful, i)
192
+ if annotated:
193
+ # E252: annotated default → keep spaces def f(x: int = 1)
194
+ space_before[i] = " "
195
+ if nx and nx.start[0] == tok.start[0]:
196
+ space_before[i + 1] = " "
197
+ else:
198
+ # E251: unannotated default → no spaces def f(x=1)
199
+ space_before[i] = ""
200
+ if nx and nx.start[0] == tok.start[0]:
201
+ space_before[i + 1] = ""
202
+
203
+ # ── E251 for call keyword args: foo(x =1) → foo(x=1) ────────────────
204
+ if s == "=" and in_brackets() and paren_stack[-1] == "call":
205
+ space_before[i] = ""
206
+ if nx and nx.start[0] == tok.start[0]:
207
+ space_before[i + 1] = ""
208
+
209
+ # ── plain assignment at statement level ───────────────────────────────
210
+ if s == "=" and not in_brackets():
211
+ space_before[i] = " "
212
+ if nx and nx.start[0] == tok.start[0]:
213
+ space_before[i + 1] = " "
214
+
215
+ return _rebuild(src, meaningful, space_before)
216
+
217
+
218
+ def _rebuild(src: str, meaningful: list, space_before: list[str | None]) -> str:
219
+ # Map start position → desired space before token
220
+ space_map: dict[tuple[int, int], str] = {}
221
+ for i, tok in enumerate(meaningful):
222
+ if space_before[i] is not None:
223
+ space_map[tok.start] = space_before[i]
224
+
225
+ lines = src.splitlines(keepends=True)
226
+ by_line: dict[int, list] = defaultdict(list)
227
+ for tok in _safe_tokenize(src):
228
+ if tok.type not in (tokenize.ENCODING, tokenize.ENDMARKER):
229
+ by_line[tok.start[0]].append(tok)
230
+
231
+ out_lines: list[str] = []
232
+
233
+ for lineno, line in enumerate(lines, 1):
234
+ toks = [
235
+ t for t in by_line.get(lineno, [])
236
+ if t.type not in (
237
+ tokenize.INDENT, tokenize.DEDENT,
238
+ tokenize.NL, tokenize.NEWLINE, tokenize.ENCODING,
239
+ )
240
+ and t.start[0] == lineno
241
+ ]
242
+ if not toks:
243
+ out_lines.append(line)
244
+ continue
245
+
246
+ stripped = line.lstrip()
247
+ indent = line[: len(line) - len(stripped)]
248
+ eol = "\n" if line.endswith("\n") else ""
249
+
250
+ parts: list[str] = [indent]
251
+ for j, tok in enumerate(toks):
252
+ if j == 0:
253
+ parts.append(tok.string)
254
+ continue
255
+
256
+ prev_tok = toks[j - 1]
257
+ original_gap = line[prev_tok.end[1]:tok.start[1]]
258
+ desired = space_map.get(tok.start)
259
+ gap = desired if desired is not None else original_gap
260
+
261
+ # E261/E262: inline comment needs 2 spaces before, "# " after hash
262
+ if tok.type == tokenize.COMMENT:
263
+ gap = " "
264
+ comment = tok.string
265
+ if len(comment) > 1 and comment[1] not in (" ", "!"):
266
+ comment = "# " + comment[1:]
267
+ parts.append(gap)
268
+ parts.append(comment)
269
+ continue
270
+
271
+ parts.append(gap)
272
+ parts.append(tok.string)
273
+
274
+ out_lines.append("".join(parts) + eol)
275
+
276
+ return "".join(out_lines)
277
+
278
+
279
+ # ─── operator spacing fallback (simple regex) ─────────────────────────────────
280
+
281
+ def fix_operator_spacing(src: str) -> str:
282
+ lines = src.splitlines(keepends=True)
283
+ out: list[str] = []
284
+ for line in lines:
285
+ out.append(_fix_line(line))
286
+ return "".join(out)
287
+
288
+
289
+ def _fix_line(line: str) -> str:
290
+ stripped = line.lstrip()
291
+ if not stripped or stripped.startswith("#"):
292
+ return line
293
+ indent = line[: len(line) - len(line.lstrip())]
294
+ code = stripped.rstrip("\n\r")
295
+ eol = line[len(indent) + len(code):]
296
+ code = re.sub(r"(?<![=!<>+\-*/%&|^])(\s*)=(\s*)(?![=])", lambda m: " = ", code)
297
+ code = re.sub(r"\s*([+\-*/%&|^]=)\s*", r" \1 ", code)
298
+ return indent + code + eol
299
+
300
+
301
+ # ─── indentation ──────────────────────────────────────────────────────────────
302
+
303
+ def fix_indentation(src: str) -> str:
304
+ try:
305
+ tokens = list(tokenize.generate_tokens(io.StringIO(src).readline))
306
+ except tokenize.TokenError:
307
+ return src
308
+
309
+ lines = src.splitlines(keepends=True)
310
+ token_by_line: dict[int, list] = defaultdict(list)
311
+ for tok in tokens:
312
+ token_by_line[tok.start[0]].append(tok)
313
+
314
+ current_depth = 0
315
+ out: list[str] = []
316
+ for i, line in enumerate(lines):
317
+ lineno = i + 1
318
+ stripped = line.lstrip()
319
+ eol = "\n" if line.endswith("\n") else ""
320
+ if not stripped:
321
+ out.append(eol)
322
+ continue
323
+ for tok in token_by_line.get(lineno, []):
324
+ if tok.type == tokenize.INDENT:
325
+ current_depth += 1
326
+ elif tok.type == tokenize.DEDENT:
327
+ current_depth = max(0, current_depth - 1)
328
+ out.append(" " * current_depth + stripped.rstrip("\n\r") + eol)
329
+ return "".join(out)
330
+
331
+
332
+ # ─── blank lines ──────────────────────────────────────────────────────────────
333
+
334
+ _TOP_LEVEL_DEF = re.compile(r"^(class |def |async def )")
335
+ _METHOD_DEF = re.compile(r"^(def |async def )")
336
+
337
+
338
+ def fix_blank_lines(src: str) -> str:
339
+ lines = src.splitlines()
340
+ out: list[str] = []
341
+ for i, line in enumerate(lines):
342
+ stripped = line.lstrip()
343
+ indent = len(line) - len(stripped)
344
+
345
+ # Two blank lines before top-level class/def
346
+ if _TOP_LEVEL_DEF.match(stripped) and indent == 0:
347
+ while out and out[-1].strip() == "":
348
+ out.pop()
349
+ if out:
350
+ out.append("")
351
+ out.append("")
352
+
353
+ # One blank line before methods inside a class (indented def)
354
+ elif _METHOD_DEF.match(stripped) and indent > 0:
355
+ # Only add if previous non-blank line is not an opening class body
356
+ prev_nonblank = next((l for l in reversed(out) if l.strip()), "")
357
+ if out and prev_nonblank.strip() not in ("", ) and not prev_nonblank.rstrip().endswith(":"):
358
+ while out and out[-1].strip() == "":
359
+ out.pop()
360
+ if out:
361
+ out.append("")
362
+
363
+ out.append(line)
364
+ return "\n".join(out)
365
+
366
+
367
+ # ─── imports ──────────────────────────────────────────────────────────────────
368
+
369
+ def fix_multiple_imports(src: str) -> str:
370
+ lines = src.splitlines(keepends=True)
371
+ out: list[str] = []
372
+ for line in lines:
373
+ stripped = line.lstrip()
374
+ indent = line[: len(line) - len(stripped)]
375
+ eol = "\n" if line.endswith("\n") else ""
376
+ m = re.match(r"^import\s+(.+)$", stripped.rstrip())
377
+ if m:
378
+ names = [n.strip() for n in m.group(1).split(",")]
379
+ if len(names) > 1:
380
+ for name in names:
381
+ out.append(f"{indent}import {name}{eol}")
382
+ continue
383
+ out.append(line)
384
+ return "".join(out)
385
+
386
+
387
+ # ─── trailing whitespace / newline ────────────────────────────────────────────
388
+
389
+ def fix_trailing_whitespace(src: str) -> str:
390
+ return "\n".join(l.rstrip() for l in src.splitlines())
391
+
392
+
393
+ def ensure_final_newline(src: str) -> str:
394
+ return src.rstrip("\n") + "\n"
395
+
396
+
397
+ # ─── helpers ──────────────────────────────────────────────────────────────────
398
+
399
+ def _safe_tokenize(src: str) -> list:
400
+ try:
401
+ return list(tokenize.generate_tokens(io.StringIO(src).readline))
402
+ except tokenize.TokenError:
403
+ return []
404
+
405
+
406
+ def _offset(src: str, rowcol: tuple[int, int]) -> int:
407
+ row, col = rowcol
408
+ lines = src.splitlines(keepends=True)
409
+ return sum(len(lines[r]) for r in range(row - 1)) + col
410
+
411
+
412
+ def _param_is_annotated(meaningful: list, eq_index: int) -> bool:
413
+ """Return True if the = at eq_index belongs to an annotated parameter.
414
+
415
+ Walk backwards from eq_index to find the most recent , or ( at the same
416
+ bracket depth. If we find a : (annotation) before hitting , or (, the
417
+ param is annotated.
418
+ """
419
+ depth = 0
420
+ for j in range(eq_index - 1, -1, -1):
421
+ s = meaningful[j].string
422
+ if s in (")", "]", "}"):
423
+ depth += 1
424
+ elif s in ("(", "[", "{"):
425
+ if depth == 0:
426
+ return False # hit opening paren — no annotation found
427
+ depth -= 1
428
+ elif depth == 0:
429
+ if s == ",":
430
+ return False # hit comma — no annotation between , and =
431
+ if s == ":":
432
+ return True # found annotation colon
433
+ return False
434
+
435
+
436
+ # Priority groups (lower index = lower priority = MORE spaces)
437
+ # PEP8: "in expressions with operators of different priorities, add spaces
438
+ # around the operator with the lowest priority"
439
+ _OP_PRIORITY: dict[str, int] = {
440
+ # lowest priority → spaces preferred
441
+ "or": 0, "and": 1, "not": 2,
442
+ "in": 3, "not in": 3, "is": 3, "is not": 3,
443
+ "<": 4, ">": 4, "<=": 4, ">=": 4, "==": 4, "!=": 4,
444
+ "|": 5, "^": 6, "&": 7,
445
+ "<<": 8, ">>": 8,
446
+ "+": 9, "-": 9, # lower priority arithmetic → spaces
447
+ "*": 10, "/": 10, "//": 10, "%": 10, "@": 10,
448
+ "**": 11, # highest priority → no spaces preferred
449
+ }
450
+
451
+
452
+ def fix_operator_priority_spacing(src: str) -> str:
453
+ """
454
+ PEP8: around operators of different priority in the same expression,
455
+ add spaces around the lower-priority operator and no spaces around
456
+ the higher-priority one.
457
+
458
+ e.g. x*2 - 1 is correct (- is lower priority than *)
459
+ x = x*2 - 1 stays as-is
460
+ hypot2 = x*x + y*y stays
461
+ """
462
+ # This is a conservative line-level pass — only rewrite lines where
463
+ # a clear mix of priorities is obvious without a full AST.
464
+ # We leave ambiguous cases untouched.
465
+ lines = src.splitlines(keepends=True)
466
+ return "".join(_fix_priority_line(l) for l in lines)
467
+
468
+
469
+ def _fix_priority_line(line: str) -> str:
470
+ stripped = line.lstrip()
471
+ if not stripped or stripped.startswith("#"):
472
+ return line
473
+ # Only touch lines that have at least one * or ** next to a + or -
474
+ # Pattern: space-then-op-then-space on low-priority, no-space on high-priority
475
+ # We just ensure the standard "no space around **" rule
476
+ import re as _re
477
+ # Remove spaces around ** (highest priority, PEP8 example: 2**10)
478
+ # Only when operands are simple (names/numbers)
479
+ line = _re.sub(r"(\w)\s+\*\*\s+(\w)", r"\1**\2", line)
480
+ return line
481
+
482
+
483
+ def split_semicolons(src: str) -> str:
484
+ """Split statements separated by semicolons onto separate lines (E702/E401).
485
+
486
+ x = 0; y = 0 → x = 0
487
+ y = 0
488
+
489
+ Also splits compound statements on one line:
490
+ if True: pass → if True:
491
+ pass
492
+ """
493
+ lines = src.splitlines(keepends=True)
494
+ out: list[str] = []
495
+ for line in lines:
496
+ out.extend(_split_line(line))
497
+ return "".join(out)
498
+
499
+
500
+ def _split_line(line: str) -> list[str]: # noqa: C901
501
+ stripped = line.lstrip()
502
+ eol = "\n" if line.endswith("\n") else ""
503
+ indent = line[: len(line) - len(stripped)]
504
+
505
+ # Skip blank lines and comment-only lines
506
+ if not stripped or stripped.startswith("#"):
507
+ return [line]
508
+
509
+ # Tokenize to find semicolons outside strings/brackets
510
+ import tokenize as _tok
511
+ import io as _io
512
+
513
+ try:
514
+ tokens = list(_tok.generate_tokens(_io.StringIO(line).readline))
515
+ except _tok.TokenError:
516
+ return [line]
517
+
518
+ # Find semicolons at depth 0 (not inside brackets)
519
+ depth = 0
520
+ semi_cols: list[int] = []
521
+ for tok in tokens:
522
+ if tok.type == _tok.OP:
523
+ if tok.string in ("(", "[", "{"):
524
+ depth += 1
525
+ elif tok.string in (")", "]", "}"):
526
+ depth -= 1
527
+ elif tok.string == ";" and depth == 0:
528
+ # store relative column (subtract indent length)
529
+ semi_cols.append(tok.start[1] - len(indent))
530
+
531
+ # Handle compound statements FIRST (before semicolons)
532
+ # Handle compound statements: if/for/while/with/else/elif/try/except/finally: body
533
+ # e.g. if x: do_thing()
534
+ _COMPOUND = re.compile(
535
+ r"^(if|elif|else|for|while|with|try|except|finally|class|def|async def|async for|async with)\b"
536
+ )
537
+ if _COMPOUND.match(stripped):
538
+ # Find the colon that ends the header (at depth 0)
539
+ # We want the FIRST colon at depth 0 that is followed by non-empty content
540
+ depth = 0
541
+ colon_col = None
542
+ for tok in tokens:
543
+ if tok.type == _tok.OP:
544
+ if tok.string in ("(", "[", "{"):
545
+ depth += 1
546
+ elif tok.string in (")", "]", "}"):
547
+ depth = max(0, depth - 1)
548
+ elif tok.string == ":" and depth == 0:
549
+ # tok.start[1] is absolute column in line; adjust for indent
550
+ abs_col = tok.start[1]
551
+ rel_col = abs_col - len(indent)
552
+ after = stripped[rel_col + 1:].strip()
553
+ if after and not after.startswith("#"):
554
+ colon_col = rel_col
555
+ break # take the first qualifying colon
556
+ if colon_col is not None:
557
+ header = stripped[: colon_col + 1].rstrip()
558
+ body = stripped[colon_col + 1 :].strip()
559
+ if body and not body.startswith("#"):
560
+ # Body may itself contain semicolons — recurse on body
561
+ body_lines = _split_line(indent + " " + body + eol)
562
+ return [indent + header + eol] + body_lines
563
+
564
+ if semi_cols:
565
+ # Split on semicolons
566
+ result: list[str] = []
567
+ code = stripped.rstrip("\n\r")
568
+ parts = _split_at_cols(code, semi_cols)
569
+ for part in parts:
570
+ part = part.strip()
571
+ if part:
572
+ result.append(indent + part + eol)
573
+ return result
574
+
575
+ return [line]
576
+
577
+
578
+ def _split_at_cols(code: str, semi_cols: list[int]) -> list[str]:
579
+ """Split a string at the given column positions (semicolons)."""
580
+ parts: list[str] = []
581
+ prev = 0
582
+ for col in semi_cols:
583
+ parts.append(code[prev:col])
584
+ prev = col + 1 # skip the semicolon
585
+ parts.append(code[prev:])
586
+ return parts
@@ -0,0 +1,106 @@
1
+ Metadata-Version: 2.4
2
+ Name: true-formatter
3
+ Version: 0.1.0
4
+ Summary: The uncompromising Python formatter.
5
+ Author: True Contributors
6
+ License: MIT
7
+ Requires-Python: >=3.8
8
+ Description-Content-Type: text/markdown
9
+ Provides-Extra: dev
10
+ Requires-Dist: pytest>=7.4; extra == "dev"
11
+ Requires-Dist: pytest-cov>=4.1; extra == "dev"
12
+ Dynamic: author
13
+ Dynamic: description
14
+ Dynamic: description-content-type
15
+ Dynamic: license
16
+ Dynamic: provides-extra
17
+ Dynamic: requires-python
18
+ Dynamic: summary
19
+
20
+ # True — The Uncompromising Python Formatter
21
+
22
+ [![Python](https://img.shields.io/badge/python-3.8%2B-blue)](https://www.python.org)
23
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
24
+
25
+ **True** is an opinionated Python source-code formatter inspired by [Black](https://github.com/psf/black).
26
+ It enforces a consistent style so you never have to think about formatting again.
27
+
28
+ ---
29
+
30
+ ## Features
31
+
32
+ - Normalises string quotes (`'hello'` → `"hello"`)
33
+ - Removes trailing whitespace from every line
34
+ - Enforces two blank lines before top-level `def` / `class`
35
+ - Fixes spacing around `=` and binary operators
36
+ - Guarantees a single trailing newline
37
+ - Pluggable rule system — extend or disable any rule
38
+ - Zero dependencies (pure stdlib)
39
+
40
+ ---
41
+
42
+ ## Installation
43
+
44
+ ```bash
45
+ pip install true-formatter
46
+ ```
47
+
48
+ Or from source:
49
+
50
+ ```bash
51
+ git clone https://github.com/yourname/true-formatter
52
+ cd true-formatter
53
+ pip install -e .[dev]
54
+ ```
55
+
56
+ ---
57
+
58
+ ## Quick start
59
+
60
+ ### As a CLI tool
61
+
62
+ ```bash
63
+ # Format a file in-place
64
+ true my_script.py
65
+
66
+ # Check without modifying
67
+ true --check my_script.py
68
+
69
+ # Show a unified diff
70
+ true --diff my_script.py
71
+
72
+ # Format an entire directory
73
+ true src/
74
+
75
+ # Read from stdin
76
+ echo "x=1" | true -
77
+ ```
78
+
79
+ ### As a library
80
+
81
+ ```python
82
+ import true_formatter
83
+
84
+ code = "x=1\ny= 'hello'\n"
85
+ result = true_formatter.format_str(code, mode=true_formatter.Mode())
86
+ print(result)
87
+ # x = 1
88
+ # y = "hello"
89
+ ```
90
+
91
+ ---
92
+
93
+ ## Documentation
94
+
95
+ | Document | Description |
96
+ |---|---|
97
+ | [API Reference](docs/api.md) | Full public API — `format_str`, `Mode`, `RuleSet`, exceptions |
98
+ | [CLI Reference](docs/cli.md) | All command-line flags and examples |
99
+ | [Architecture](docs/architecture.md) | How the formatter pipeline works internally |
100
+ | [Contributing](docs/contributing.md) | Writing rules, running tests, sending PRs |
101
+
102
+ ---
103
+
104
+ ## License
105
+
106
+ MIT © True Contributors
@@ -0,0 +1,12 @@
1
+ true_formatter/__init__.py,sha256=6vftv8M66XOi21jkRWbm-_ECTCvfMygLnMHE_N1I1YQ,550
2
+ true_formatter/__main__.py,sha256=nbdP6zFlOqcCirgxoIQ6HiWVhdh7URm5GOWr5_EwmBk,116
3
+ true_formatter/cli.py,sha256=y004hnnSUJMBr8CVFiq8ksGdJmDYdMZZ9nF-6i0uxGo,3309
4
+ true_formatter/core.py,sha256=8eEVX8twI1ReHrjfdsOgji5CtCJCrOXOOPzYAXnQfZI,3055
5
+ true_formatter/exceptions.py,sha256=FXEmnDk57L6GNfs_uLi6RHRzybHbJpvNolrXErQMeMo,168
6
+ true_formatter/rules.py,sha256=ONDitsRN3DhqJOXGiVPCnvtFlUpqbBpaMslyqQjQJ5E,1387
7
+ true_formatter/transforms.py,sha256=fA1Za7WT4CtyisMFjcxkCsr0JGJa59_dWBE6HrS5f_c,22380
8
+ true_formatter-0.1.0.dist-info/METADATA,sha256=YZLu1pKCNsCR41U70Am8X2P1CeW5RsT8unOihTN3ChQ,2430
9
+ true_formatter-0.1.0.dist-info/WHEEL,sha256=K260EYznzXsJYBQGqmI8VTxEdiZYNvDZwW9cBh9-_MA,91
10
+ true_formatter-0.1.0.dist-info/entry_points.txt,sha256=VFgjqFP95rPkm_YTaXg4lLZElZTrHJj1U8qguZej0Z4,49
11
+ true_formatter-0.1.0.dist-info/top_level.txt,sha256=RDlRJo9hJQPATlk_9I4IVpLVfCvjH8QG_pXJHui4PaU,15
12
+ true_formatter-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (83.0.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ true = true_formatter.cli:main
@@ -0,0 +1 @@
1
+ true_formatter