wssweep 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wssweep-0.1.0/LICENSE +21 -0
- wssweep-0.1.0/PKG-INFO +124 -0
- wssweep-0.1.0/README.md +101 -0
- wssweep-0.1.0/pyproject.toml +38 -0
- wssweep-0.1.0/setup.cfg +4 -0
- wssweep-0.1.0/src/wssweep/__init__.py +3 -0
- wssweep-0.1.0/src/wssweep/__main__.py +6 -0
- wssweep-0.1.0/src/wssweep/cli.py +172 -0
- wssweep-0.1.0/src/wssweep/core.py +188 -0
- wssweep-0.1.0/src/wssweep/walk.py +152 -0
- wssweep-0.1.0/src/wssweep.egg-info/PKG-INFO +124 -0
- wssweep-0.1.0/src/wssweep.egg-info/SOURCES.txt +14 -0
- wssweep-0.1.0/src/wssweep.egg-info/dependency_links.txt +1 -0
- wssweep-0.1.0/src/wssweep.egg-info/entry_points.txt +2 -0
- wssweep-0.1.0/src/wssweep.egg-info/top_level.txt +1 -0
- wssweep-0.1.0/tests/test_core.py +123 -0
wssweep-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 wssweep contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
wssweep-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: wssweep
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Zero-config whitespace doctor: find (and --fix) trailing whitespace, mixed line endings, missing/extra final newlines, BOMs and mixed indentation. Zero dependencies.
|
|
5
|
+
Author: yyfjj
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/jjdoor/wssweep-py
|
|
8
|
+
Project-URL: Repository, https://github.com/jjdoor/wssweep-py
|
|
9
|
+
Project-URL: Issues, https://github.com/jjdoor/wssweep-py/issues
|
|
10
|
+
Keywords: whitespace,trailing-whitespace,line-endings,crlf,eol,lint,formatter,cli,ci
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
18
|
+
Classifier: Topic :: Utilities
|
|
19
|
+
Requires-Python: >=3.8
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
|
|
24
|
+
# wssweep
|
|
25
|
+
|
|
26
|
+
**A zero-config whitespace doctor.** Run it on any repo and it instantly finds —
|
|
27
|
+
and with `--fix`, cleans — the whitespace problems that pollute diffs and break
|
|
28
|
+
across platforms: trailing whitespace, mixed CRLF/LF line endings, a missing
|
|
29
|
+
final newline, extra trailing blank lines, a UTF-8 BOM, lone CRs, and tabs mixed
|
|
30
|
+
with spaces in indentation.
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
pipx run wssweep
|
|
34
|
+
# config.yml (1)
|
|
35
|
+
# - mixed-eol mixed line endings (CRLF×3, LF×1)
|
|
36
|
+
#
|
|
37
|
+
# src/app.js (2)
|
|
38
|
+
# - missing-final-newline no newline at end of file
|
|
39
|
+
# 14: trailing-whitespace trailing whitespace
|
|
40
|
+
#
|
|
41
|
+
# ✖ 3 whitespace issues in 2 files (missing-final-newline=1, mixed-eol=1, trailing-whitespace=1)
|
|
42
|
+
|
|
43
|
+
pipx run wssweep --fix # clean them in place
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
No config file, no framework. Exits non-zero when it finds issues, so it drops
|
|
47
|
+
straight into CI. Pure standard library. Also on npm (`npx wssweep`) — the two
|
|
48
|
+
builds produce **byte-for-byte identical** output *and* identical fixes.
|
|
49
|
+
|
|
50
|
+
## Why another whitespace tool?
|
|
51
|
+
|
|
52
|
+
Because today this takes three or four tools wired together:
|
|
53
|
+
|
|
54
|
+
- **editorconfig-checker** reports, but needs you to author an `.editorconfig`
|
|
55
|
+
first, and it can't fix anything.
|
|
56
|
+
- **pre-commit**'s `trailing-whitespace` / `end-of-file-fixer` /
|
|
57
|
+
`mixed-line-ending` hooks *do* fix — but only inside the pre-commit framework,
|
|
58
|
+
and they're three separate hooks. Nobody runs them ad-hoc on a fresh checkout.
|
|
59
|
+
- **prettier** fixes whitespace only as a side effect of reformatting all your
|
|
60
|
+
code, is language-aware, and won't touch files it can't parse.
|
|
61
|
+
- **dos2unix** only does line endings.
|
|
62
|
+
|
|
63
|
+
`wssweep` is the one command — `pip`/`npx`, zero config — that reports *all*
|
|
64
|
+
seven whitespace smells at once with line numbers and optionally fixes them in
|
|
65
|
+
place, with a clean CI exit code, identical on Python and Node.
|
|
66
|
+
|
|
67
|
+
## What it checks
|
|
68
|
+
|
|
69
|
+
| check | what | `--fix` |
|
|
70
|
+
|---|---|---|
|
|
71
|
+
| `trailing-whitespace` | space/tab at end of a line | trims it |
|
|
72
|
+
| `mixed-eol` | a file containing **both** CRLF and LF | normalizes to LF |
|
|
73
|
+
| `lone-cr` | a bare CR (old-Mac line ending) | normalizes |
|
|
74
|
+
| `missing-final-newline` | non-empty file not ending in a newline | appends one |
|
|
75
|
+
| `trailing-blank-lines` | extra blank line(s) at end of file | collapses to one |
|
|
76
|
+
| `utf8-bom` | a leading UTF-8 BOM | strips it |
|
|
77
|
+
| `mixed-indentation` | tabs **and** spaces in one indent | report-only (needs your tab width) |
|
|
78
|
+
|
|
79
|
+
Opinionated, zero-config defaults: a consistently-CRLF file is **fine** (only
|
|
80
|
+
*mixed* endings are flagged), `.bat`/`.cmd` keep CRLF when fixed, and Markdown's
|
|
81
|
+
two-trailing-spaces hard line break is preserved (trailing-whitespace is skipped
|
|
82
|
+
in `.md`).
|
|
83
|
+
|
|
84
|
+
## Usage
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
wssweep # scan the current directory
|
|
88
|
+
wssweep src/ docs/ # scan specific paths
|
|
89
|
+
wssweep --fix # fix in place (atomic; only files that change)
|
|
90
|
+
wssweep --crlf --fix # normalize endings to CRLF instead of LF
|
|
91
|
+
wssweep --skip=mixed-indentation # turn off a check
|
|
92
|
+
wssweep --exclude='*.min.js' # skip paths by glob (repeatable)
|
|
93
|
+
wssweep --json # machine output (byte-identical both builds)
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
`.git`, `node_modules`, `dist`, `build`, `vendor`, `.venv` and friends are
|
|
97
|
+
skipped by default, as are binary files (detected by extension + a NUL-byte /
|
|
98
|
+
non-UTF-8 content check) and files over 5 MB. `--all` overrides those skips.
|
|
99
|
+
|
|
100
|
+
Exit codes: `0` clean · `1` issues found · `2` error. (`--fix` exits `0` once
|
|
101
|
+
everything fixable is fixed; a leftover `mixed-indentation` keeps it `1`.)
|
|
102
|
+
|
|
103
|
+
## How it works
|
|
104
|
+
|
|
105
|
+
It reads every file as **raw bytes** and scans a byte-faithful (latin-1) view, so
|
|
106
|
+
it never mangles encodings and the Python and Node builds agree to the byte: line
|
|
107
|
+
endings are classified from the bytes (never `splitlines`, which over-splits),
|
|
108
|
+
"whitespace" means exactly space and tab (never `\s`, which differs across
|
|
109
|
+
languages), and `--fix` writes raw bytes atomically (temp file + rename),
|
|
110
|
+
touching only files that actually change and preserving file modes. Fixing is
|
|
111
|
+
idempotent — run it twice, the second run does nothing.
|
|
112
|
+
|
|
113
|
+
## Install
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
pip install wssweep # or pipx run wssweep
|
|
117
|
+
npm i -g wssweep # Node build, identical behaviour
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
Python ≥ 3.8 or Node ≥ 18. No dependencies.
|
|
121
|
+
|
|
122
|
+
## License
|
|
123
|
+
|
|
124
|
+
MIT
|
wssweep-0.1.0/README.md
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# wssweep
|
|
2
|
+
|
|
3
|
+
**A zero-config whitespace doctor.** Run it on any repo and it instantly finds —
|
|
4
|
+
and with `--fix`, cleans — the whitespace problems that pollute diffs and break
|
|
5
|
+
across platforms: trailing whitespace, mixed CRLF/LF line endings, a missing
|
|
6
|
+
final newline, extra trailing blank lines, a UTF-8 BOM, lone CRs, and tabs mixed
|
|
7
|
+
with spaces in indentation.
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pipx run wssweep
|
|
11
|
+
# config.yml (1)
|
|
12
|
+
# - mixed-eol mixed line endings (CRLF×3, LF×1)
|
|
13
|
+
#
|
|
14
|
+
# src/app.js (2)
|
|
15
|
+
# - missing-final-newline no newline at end of file
|
|
16
|
+
# 14: trailing-whitespace trailing whitespace
|
|
17
|
+
#
|
|
18
|
+
# ✖ 3 whitespace issues in 2 files (missing-final-newline=1, mixed-eol=1, trailing-whitespace=1)
|
|
19
|
+
|
|
20
|
+
pipx run wssweep --fix # clean them in place
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
No config file, no framework. Exits non-zero when it finds issues, so it drops
|
|
24
|
+
straight into CI. Pure standard library. Also on npm (`npx wssweep`) — the two
|
|
25
|
+
builds produce **byte-for-byte identical** output *and* identical fixes.
|
|
26
|
+
|
|
27
|
+
## Why another whitespace tool?
|
|
28
|
+
|
|
29
|
+
Because today this takes three or four tools wired together:
|
|
30
|
+
|
|
31
|
+
- **editorconfig-checker** reports, but needs you to author an `.editorconfig`
|
|
32
|
+
first, and it can't fix anything.
|
|
33
|
+
- **pre-commit**'s `trailing-whitespace` / `end-of-file-fixer` /
|
|
34
|
+
`mixed-line-ending` hooks *do* fix — but only inside the pre-commit framework,
|
|
35
|
+
and they're three separate hooks. Nobody runs them ad-hoc on a fresh checkout.
|
|
36
|
+
- **prettier** fixes whitespace only as a side effect of reformatting all your
|
|
37
|
+
code, is language-aware, and won't touch files it can't parse.
|
|
38
|
+
- **dos2unix** only does line endings.
|
|
39
|
+
|
|
40
|
+
`wssweep` is the one command — `pip`/`npx`, zero config — that reports *all*
|
|
41
|
+
seven whitespace smells at once with line numbers and optionally fixes them in
|
|
42
|
+
place, with a clean CI exit code, identical on Python and Node.
|
|
43
|
+
|
|
44
|
+
## What it checks
|
|
45
|
+
|
|
46
|
+
| check | what | `--fix` |
|
|
47
|
+
|---|---|---|
|
|
48
|
+
| `trailing-whitespace` | space/tab at end of a line | trims it |
|
|
49
|
+
| `mixed-eol` | a file containing **both** CRLF and LF | normalizes to LF |
|
|
50
|
+
| `lone-cr` | a bare CR (old-Mac line ending) | normalizes |
|
|
51
|
+
| `missing-final-newline` | non-empty file not ending in a newline | appends one |
|
|
52
|
+
| `trailing-blank-lines` | extra blank line(s) at end of file | collapses to one |
|
|
53
|
+
| `utf8-bom` | a leading UTF-8 BOM | strips it |
|
|
54
|
+
| `mixed-indentation` | tabs **and** spaces in one indent | report-only (needs your tab width) |
|
|
55
|
+
|
|
56
|
+
Opinionated, zero-config defaults: a consistently-CRLF file is **fine** (only
|
|
57
|
+
*mixed* endings are flagged), `.bat`/`.cmd` keep CRLF when fixed, and Markdown's
|
|
58
|
+
two-trailing-spaces hard line break is preserved (trailing-whitespace is skipped
|
|
59
|
+
in `.md`).
|
|
60
|
+
|
|
61
|
+
## Usage
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
wssweep # scan the current directory
|
|
65
|
+
wssweep src/ docs/ # scan specific paths
|
|
66
|
+
wssweep --fix # fix in place (atomic; only files that change)
|
|
67
|
+
wssweep --crlf --fix # normalize endings to CRLF instead of LF
|
|
68
|
+
wssweep --skip=mixed-indentation # turn off a check
|
|
69
|
+
wssweep --exclude='*.min.js' # skip paths by glob (repeatable)
|
|
70
|
+
wssweep --json # machine output (byte-identical both builds)
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
`.git`, `node_modules`, `dist`, `build`, `vendor`, `.venv` and friends are
|
|
74
|
+
skipped by default, as are binary files (detected by extension + a NUL-byte /
|
|
75
|
+
non-UTF-8 content check) and files over 5 MB. `--all` overrides those skips.
|
|
76
|
+
|
|
77
|
+
Exit codes: `0` clean · `1` issues found · `2` error. (`--fix` exits `0` once
|
|
78
|
+
everything fixable is fixed; a leftover `mixed-indentation` keeps it `1`.)
|
|
79
|
+
|
|
80
|
+
## How it works
|
|
81
|
+
|
|
82
|
+
It reads every file as **raw bytes** and scans a byte-faithful (latin-1) view, so
|
|
83
|
+
it never mangles encodings and the Python and Node builds agree to the byte: line
|
|
84
|
+
endings are classified from the bytes (never `splitlines`, which over-splits),
|
|
85
|
+
"whitespace" means exactly space and tab (never `\s`, which differs across
|
|
86
|
+
languages), and `--fix` writes raw bytes atomically (temp file + rename),
|
|
87
|
+
touching only files that actually change and preserving file modes. Fixing is
|
|
88
|
+
idempotent — run it twice, the second run does nothing.
|
|
89
|
+
|
|
90
|
+
## Install
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
pip install wssweep # or pipx run wssweep
|
|
94
|
+
npm i -g wssweep # Node build, identical behaviour
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Python ≥ 3.8 or Node ≥ 18. No dependencies.
|
|
98
|
+
|
|
99
|
+
## License
|
|
100
|
+
|
|
101
|
+
MIT
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "wssweep"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Zero-config whitespace doctor: find (and --fix) trailing whitespace, mixed line endings, missing/extra final newlines, BOMs and mixed indentation. Zero dependencies."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "yyfjj" }]
|
|
13
|
+
keywords = ["whitespace", "trailing-whitespace", "line-endings", "crlf", "eol", "lint", "formatter", "cli", "ci"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Environment :: Console",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
"License :: OSI Approved :: MIT License",
|
|
19
|
+
"Operating System :: OS Independent",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Topic :: Software Development :: Quality Assurance",
|
|
22
|
+
"Topic :: Utilities",
|
|
23
|
+
]
|
|
24
|
+
dependencies = []
|
|
25
|
+
|
|
26
|
+
[project.urls]
|
|
27
|
+
Homepage = "https://github.com/jjdoor/wssweep-py"
|
|
28
|
+
Repository = "https://github.com/jjdoor/wssweep-py"
|
|
29
|
+
Issues = "https://github.com/jjdoor/wssweep-py/issues"
|
|
30
|
+
|
|
31
|
+
[project.scripts]
|
|
32
|
+
wssweep = "wssweep.cli:main"
|
|
33
|
+
|
|
34
|
+
[tool.setuptools]
|
|
35
|
+
package-dir = { "" = "src" }
|
|
36
|
+
|
|
37
|
+
[tool.setuptools.packages.find]
|
|
38
|
+
where = ["src"]
|
wssweep-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import signal
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
from . import __version__
|
|
7
|
+
from . import core
|
|
8
|
+
from . import walk as walkmod
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _mk_paint(on):
|
|
12
|
+
def col(c, s):
|
|
13
|
+
return f"\x1b[{c}m{s}\x1b[0m" if on else s
|
|
14
|
+
return {
|
|
15
|
+
"red": lambda s: col("31", s), "green": lambda s: col("32", s),
|
|
16
|
+
"yellow": lambda s: col("33", s), "dim": lambda s: col("2", s),
|
|
17
|
+
"bold": lambda s: col("1", s), "cyan": lambda s: col("36", s),
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _help(p):
|
|
22
|
+
b = p["bold"]
|
|
23
|
+
return (
|
|
24
|
+
f"{b('wssweep')} — find (and --fix) whitespace problems. Zero config, zero dependencies.\n"
|
|
25
|
+
"\n"
|
|
26
|
+
"Reports trailing whitespace, mixed CRLF/LF line endings, a missing final newline,\n"
|
|
27
|
+
"extra trailing blank lines, a UTF-8 BOM, lone CRs, and mixed tabs/spaces in indents.\n"
|
|
28
|
+
"\n"
|
|
29
|
+
f"{b('Usage')}\n"
|
|
30
|
+
" wssweep [path...] Scan paths (default: current directory)\n"
|
|
31
|
+
" wssweep --fix Fix the issues in place (atomic, only changed files)\n"
|
|
32
|
+
" wssweep --json Machine-readable output, for CI\n"
|
|
33
|
+
"\n"
|
|
34
|
+
f"{b('Options')}\n"
|
|
35
|
+
" --fix Rewrite files in place (off by default — report only)\n"
|
|
36
|
+
" --crlf When fixing, normalize line endings to CRLF instead of LF\n"
|
|
37
|
+
" --skip=a,b Skip checks (e.g. --skip=mixed-indentation)\n"
|
|
38
|
+
" --exclude=<glob> Skip paths matching a glob (repeatable; e.g. --exclude='*.min.js')\n"
|
|
39
|
+
" --all Scan vendored dirs + large/binary-extension files too\n"
|
|
40
|
+
" --json JSON output (byte-identical across the Node and Python builds)\n"
|
|
41
|
+
" --quiet Print only the summary line\n"
|
|
42
|
+
" --no-color Disable ANSI color\n"
|
|
43
|
+
" --help | --version\n"
|
|
44
|
+
"\n"
|
|
45
|
+
f"{b('Checks')} trailing-whitespace · mixed-eol · lone-cr · missing-final-newline · trailing-blank-lines · utf8-bom · mixed-indentation\n"
|
|
46
|
+
f"{b('Exit')} 0 clean · 1 issues found · 2 error\n"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def main(argv=None):
|
|
51
|
+
try:
|
|
52
|
+
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
|
|
53
|
+
except (AttributeError, ValueError):
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
argv = list(sys.argv[1:] if argv is None else argv)
|
|
57
|
+
use_color = sys.stdout.isatty() and not os.environ.get("NO_COLOR")
|
|
58
|
+
|
|
59
|
+
def die(msg):
|
|
60
|
+
sys.stderr.write(_mk_paint(use_color)["red"](f"wssweep: {msg}\n"))
|
|
61
|
+
return 2
|
|
62
|
+
|
|
63
|
+
dd_idx = argv.index("--") if "--" in argv else len(argv)
|
|
64
|
+
pre = argv[:dd_idx]
|
|
65
|
+
if "-h" in pre or "--help" in pre:
|
|
66
|
+
sys.stdout.write(_help(_mk_paint(use_color)))
|
|
67
|
+
return 0
|
|
68
|
+
if "-v" in pre or "--version" in pre:
|
|
69
|
+
sys.stdout.write(__version__ + "\n")
|
|
70
|
+
return 0
|
|
71
|
+
|
|
72
|
+
do_fix = crlf = as_json = quiet = no_color = all_ = False
|
|
73
|
+
roots, excludes = [], []
|
|
74
|
+
skip = set()
|
|
75
|
+
dd = False
|
|
76
|
+
i = 0
|
|
77
|
+
while i < len(argv):
|
|
78
|
+
a = argv[i]
|
|
79
|
+
if dd:
|
|
80
|
+
roots.append(a); i += 1; continue
|
|
81
|
+
if a == "--":
|
|
82
|
+
dd = True; i += 1; continue
|
|
83
|
+
eq = a.find("=") if a.startswith("--") else -1
|
|
84
|
+
flag = a[:eq] if eq != -1 else a
|
|
85
|
+
inline = a[eq + 1:] if eq != -1 else None
|
|
86
|
+
|
|
87
|
+
def take():
|
|
88
|
+
nonlocal i
|
|
89
|
+
if inline is not None:
|
|
90
|
+
return inline
|
|
91
|
+
i += 1
|
|
92
|
+
return argv[i] if i < len(argv) else ""
|
|
93
|
+
|
|
94
|
+
if flag == "--fix":
|
|
95
|
+
do_fix = True
|
|
96
|
+
elif flag == "--crlf":
|
|
97
|
+
crlf = True
|
|
98
|
+
elif flag == "--json":
|
|
99
|
+
as_json = True
|
|
100
|
+
elif flag == "--quiet":
|
|
101
|
+
quiet = True
|
|
102
|
+
elif flag == "--no-color":
|
|
103
|
+
no_color = True
|
|
104
|
+
elif flag == "--all":
|
|
105
|
+
all_ = True
|
|
106
|
+
elif flag == "--skip":
|
|
107
|
+
for x in take().split(","):
|
|
108
|
+
x = x.strip()
|
|
109
|
+
if x:
|
|
110
|
+
skip.add(x)
|
|
111
|
+
elif flag == "--exclude":
|
|
112
|
+
v = take()
|
|
113
|
+
if v:
|
|
114
|
+
excludes.append(v)
|
|
115
|
+
elif a in ("-h", "--help", "-v", "--version"):
|
|
116
|
+
pass
|
|
117
|
+
elif a.startswith("-") and a != "-":
|
|
118
|
+
return die(f"unknown option: {a} (use -- to end options)")
|
|
119
|
+
else:
|
|
120
|
+
roots.append(a)
|
|
121
|
+
i += 1
|
|
122
|
+
|
|
123
|
+
if not roots:
|
|
124
|
+
roots.append(".")
|
|
125
|
+
paint = _mk_paint(use_color and not no_color)
|
|
126
|
+
|
|
127
|
+
for r in roots:
|
|
128
|
+
if not walkmod.root_exists(r):
|
|
129
|
+
return die(f"cannot access '{r}'")
|
|
130
|
+
|
|
131
|
+
try:
|
|
132
|
+
files = walkmod.collect_files(roots, {"all": all_, "exclude": excludes})
|
|
133
|
+
except OSError as e:
|
|
134
|
+
return die(str(e))
|
|
135
|
+
|
|
136
|
+
records = []
|
|
137
|
+
files_scanned = 0
|
|
138
|
+
for f in files:
|
|
139
|
+
buf = walkmod.read_file_bytes(f, all_)
|
|
140
|
+
if buf is None:
|
|
141
|
+
continue
|
|
142
|
+
files_scanned += 1
|
|
143
|
+
ext = walkmod.ext_of(f)
|
|
144
|
+
s = walkmod.bytes_to_latin1(buf)
|
|
145
|
+
issues = [iss for iss in core.analyze(s, ext) if iss["check"] not in skip]
|
|
146
|
+
if do_fix and core.has_fixable_issue(issues):
|
|
147
|
+
out = core.fix(s, ext, {"crlf": crlf, "skip": skip})
|
|
148
|
+
if out != s:
|
|
149
|
+
try:
|
|
150
|
+
walkmod.write_file_atomic(f, walkmod.latin1_to_bytes(out))
|
|
151
|
+
except OSError as e:
|
|
152
|
+
return die(str(e))
|
|
153
|
+
posix = walkmod.to_posix(f)
|
|
154
|
+
for iss in issues:
|
|
155
|
+
records.append({"file": posix, "check": iss["check"], "line": iss["line"], "message": iss["message"]})
|
|
156
|
+
|
|
157
|
+
records = core.sort_file_issues(records)
|
|
158
|
+
summary = core.summarize(records, files_scanned)
|
|
159
|
+
|
|
160
|
+
if as_json:
|
|
161
|
+
sys.stdout.write(json.dumps(core.to_json(records, summary), indent=2, ensure_ascii=False) + "\n")
|
|
162
|
+
elif do_fix:
|
|
163
|
+
remaining = [r for r in records if r["check"] not in core.FIXABLE]
|
|
164
|
+
sys.stdout.write(core.format_report(records, summary, paint, {"quiet": quiet, "fixed": True}) + "\n")
|
|
165
|
+
if remaining:
|
|
166
|
+
sys.stdout.write(paint["yellow"](f" {len(remaining)} issue(s) need manual fixing (not auto-fixable)") + "\n")
|
|
167
|
+
else:
|
|
168
|
+
sys.stdout.write(core.format_report(records, summary, paint, {"quiet": quiet}) + "\n")
|
|
169
|
+
|
|
170
|
+
if do_fix:
|
|
171
|
+
return 1 if any(r["check"] not in core.FIXABLE for r in records) else 0
|
|
172
|
+
return 1 if records else 0
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"""wssweep core — pure whitespace detection & fixing. No fs, no walking.
|
|
2
|
+
|
|
3
|
+
Everything operates on a LATIN-1 (byte-faithful) string: each byte 0x00–0xFF maps
|
|
4
|
+
to one code point, reversibly, so the Node and Python builds see the exact same
|
|
5
|
+
bytes and emit byte-for-byte identical output. We never use \\s (Unicode-divergent),
|
|
6
|
+
never str.splitlines (over-splits on VT/FF/NEL/LS/PS), and never a text-mode reader
|
|
7
|
+
(would translate newlines). The CLI converts file bytes <-> latin1 string at the IO
|
|
8
|
+
boundary. Trailing-ws regex uses \\Z (not $) to match JS's end-of-string $ exactly.
|
|
9
|
+
|
|
10
|
+
Checks (all default on): trailing-whitespace, mixed-eol, lone-cr,
|
|
11
|
+
missing-final-newline, trailing-blank-lines, utf8-bom, mixed-indentation.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import re
|
|
15
|
+
|
|
16
|
+
BOM = "\xef\xbb\xbf" # UTF-8 BOM EF BB BF in latin1
|
|
17
|
+
CRLF_EXTS = {"bat", "cmd", "ps1", "sln", "csproj", "vcxproj", "props"}
|
|
18
|
+
MD_EXTS = {"md", "markdown"}
|
|
19
|
+
|
|
20
|
+
CHECK_ORDER = {
|
|
21
|
+
"utf8-bom": 0, "mixed-eol": 1, "lone-cr": 2, "missing-final-newline": 3,
|
|
22
|
+
"trailing-blank-lines": 4, "trailing-whitespace": 5, "mixed-indentation": 6,
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
_EOL_RE = re.compile(r"\r\n|\r|\n")
|
|
26
|
+
_TRAIL_RE = re.compile(r"[ \t]+\Z")
|
|
27
|
+
_BLANK_RE = re.compile(r"[ \t]*\Z")
|
|
28
|
+
_INDENT_RE = re.compile(r"[ \t]*")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def count_sub(s, sub):
|
|
32
|
+
return s.count(sub) if sub else 0
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def split_physical_lines(s):
|
|
36
|
+
return _EOL_RE.split(s)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def analyze(s, ext):
|
|
40
|
+
issues = []
|
|
41
|
+
is_md = ext in MD_EXTS
|
|
42
|
+
has_bom = s.startswith(BOM)
|
|
43
|
+
body = s[3:] if has_bom else s
|
|
44
|
+
if has_bom:
|
|
45
|
+
issues.append({"check": "utf8-bom", "line": None, "message": "UTF-8 BOM"})
|
|
46
|
+
|
|
47
|
+
crlf = count_sub(body, "\r\n")
|
|
48
|
+
lone_lf = count_sub(body, "\n") - crlf
|
|
49
|
+
lone_cr = count_sub(body, "\r") - crlf
|
|
50
|
+
if crlf > 0 and lone_lf > 0:
|
|
51
|
+
issues.append({"check": "mixed-eol", "line": None, "message": f"mixed line endings (CRLF×{crlf}, LF×{lone_lf})"})
|
|
52
|
+
if lone_cr > 0:
|
|
53
|
+
issues.append({"check": "lone-cr", "line": None, "message": f"{lone_cr} bare CR"})
|
|
54
|
+
|
|
55
|
+
lines = split_physical_lines(body)
|
|
56
|
+
ends_with_newline = len(body) > 0 and lines[-1] == ""
|
|
57
|
+
|
|
58
|
+
for i, line in enumerate(lines):
|
|
59
|
+
if not is_md and _TRAIL_RE.search(line):
|
|
60
|
+
issues.append({"check": "trailing-whitespace", "line": i + 1, "message": "trailing whitespace"})
|
|
61
|
+
indent = _INDENT_RE.match(line).group(0)
|
|
62
|
+
if " " in indent and "\t" in indent:
|
|
63
|
+
issues.append({"check": "mixed-indentation", "line": i + 1, "message": "mixed tabs and spaces in indent"})
|
|
64
|
+
|
|
65
|
+
if len(body) > 0 and not body.endswith("\n") and not body.endswith("\r"):
|
|
66
|
+
issues.append({"check": "missing-final-newline", "line": None, "message": "no newline at end of file"})
|
|
67
|
+
|
|
68
|
+
if ends_with_newline:
|
|
69
|
+
content = lines[:-1]
|
|
70
|
+
k = 0
|
|
71
|
+
for j in range(len(content) - 1, -1, -1):
|
|
72
|
+
if _BLANK_RE.fullmatch(content[j]):
|
|
73
|
+
k += 1
|
|
74
|
+
else:
|
|
75
|
+
break
|
|
76
|
+
if k >= 1:
|
|
77
|
+
issues.append({"check": "trailing-blank-lines", "line": None, "message": f"{k} trailing blank line{'' if k == 1 else 's'}"})
|
|
78
|
+
|
|
79
|
+
issues.sort(key=lambda a: (a["line"] if a["line"] is not None else 0, CHECK_ORDER[a["check"]]))
|
|
80
|
+
return issues
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
FIXABLE = {"utf8-bom", "mixed-eol", "lone-cr", "missing-final-newline", "trailing-blank-lines", "trailing-whitespace"}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def has_fixable_issue(issues):
|
|
87
|
+
return any(i["check"] in FIXABLE for i in issues)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def fix(s, ext, opts=None):
|
|
91
|
+
opts = opts or {}
|
|
92
|
+
skip = opts.get("skip") or set()
|
|
93
|
+
is_md = ext in MD_EXTS
|
|
94
|
+
has_bom = s.startswith(BOM)
|
|
95
|
+
strip_bom = has_bom and "utf8-bom" not in skip
|
|
96
|
+
body = s[3:] if strip_bom else s
|
|
97
|
+
if len(body) == 0:
|
|
98
|
+
return ""
|
|
99
|
+
skip_trailing = is_md or "trailing-whitespace" in skip
|
|
100
|
+
|
|
101
|
+
crlf = count_sub(body, "\r\n")
|
|
102
|
+
lone_lf = count_sub(body, "\n") - crlf
|
|
103
|
+
lone_cr = count_sub(body, "\r") - crlf
|
|
104
|
+
consistent_crlf = crlf > 0 and lone_lf == 0 and lone_cr == 0
|
|
105
|
+
eol = "\r\n" if (opts.get("crlf") or ext in CRLF_EXTS or consistent_crlf) else "\n"
|
|
106
|
+
|
|
107
|
+
lines = split_physical_lines(body)
|
|
108
|
+
ends_with_newline = lines[-1] == ""
|
|
109
|
+
content = lines[:-1] if ends_with_newline else lines
|
|
110
|
+
fixed = [l if skip_trailing else _TRAIL_RE.sub("", l) for l in content]
|
|
111
|
+
while fixed and _BLANK_RE.fullmatch(fixed[-1]):
|
|
112
|
+
fixed.pop()
|
|
113
|
+
out = eol.join(fixed)
|
|
114
|
+
if fixed:
|
|
115
|
+
out += eol
|
|
116
|
+
return out
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
# ---- rendering -------------------------------------------------------------
|
|
120
|
+
|
|
121
|
+
PLAIN = {"red": lambda s: s, "green": lambda s: s, "yellow": lambda s: s,
|
|
122
|
+
"dim": lambda s: s, "bold": lambda s: s, "cyan": lambda s: s}
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def sort_file_issues(records):
|
|
126
|
+
return sorted(records, key=lambda r: (r["file"].encode("utf-8"),
|
|
127
|
+
r["line"] if r["line"] is not None else 0,
|
|
128
|
+
CHECK_ORDER[r["check"]]))
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def summarize(records, files_scanned):
|
|
132
|
+
by_type = {}
|
|
133
|
+
for r in records:
|
|
134
|
+
by_type[r["check"]] = by_type.get(r["check"], 0) + 1
|
|
135
|
+
files = {r["file"] for r in records}
|
|
136
|
+
return {"filesScanned": files_scanned, "filesWithIssues": len(files), "issueCount": len(records), "byType": by_type}
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def to_json(records, summary):
|
|
140
|
+
by_file = {}
|
|
141
|
+
order = []
|
|
142
|
+
for r in records:
|
|
143
|
+
if r["file"] not in by_file:
|
|
144
|
+
by_file[r["file"]] = []
|
|
145
|
+
order.append(r["file"])
|
|
146
|
+
by_file[r["file"]].append({"check": r["check"], "line": r["line"], "message": r["message"]})
|
|
147
|
+
return {"version": 1, "summary": summary, "files": [{"path": f, "issues": by_file[f]} for f in order]}
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def format_report(records, summary, paint=None, opts=None):
|
|
151
|
+
p = paint or PLAIN
|
|
152
|
+
opts = opts or {}
|
|
153
|
+
quiet = opts.get("quiet")
|
|
154
|
+
fixed = opts.get("fixed")
|
|
155
|
+
if not records:
|
|
156
|
+
n = summary["filesScanned"]
|
|
157
|
+
return p["green"]("✓ no whitespace issues") + f" ({n} file{'' if n == 1 else 's'} scanned)"
|
|
158
|
+
lines = []
|
|
159
|
+
if not quiet:
|
|
160
|
+
current = None
|
|
161
|
+
for r in records:
|
|
162
|
+
if r["file"] != current:
|
|
163
|
+
if current is not None:
|
|
164
|
+
lines.append("")
|
|
165
|
+
current = r["file"]
|
|
166
|
+
n = sum(1 for x in records if x["file"] == r["file"])
|
|
167
|
+
lines.append(p["bold"](r["file"]) + p["dim"](f" ({n})"))
|
|
168
|
+
loc = p["dim"](" - ") if r["line"] is None else p["dim"](str(r["line"]).rjust(4) + ":")
|
|
169
|
+
verb = p["green"]("fixed ") if (fixed and r["check"] in FIXABLE) else ""
|
|
170
|
+
lines.append(f" {loc} {verb}{p['cyan'](r['check'])} {p['dim'](r['message'])}")
|
|
171
|
+
lines.append("")
|
|
172
|
+
if fixed:
|
|
173
|
+
# Count only the issues actually auto-fixed (report-only checks like
|
|
174
|
+
# mixed-indentation never change bytes and aren't "fixed").
|
|
175
|
+
fix_recs = [r for r in records if r["check"] in FIXABLE]
|
|
176
|
+
fc = len(fix_recs)
|
|
177
|
+
ff = len({r["file"] for r in fix_recs})
|
|
178
|
+
by_type = {}
|
|
179
|
+
for r in fix_recs:
|
|
180
|
+
by_type[r["check"]] = by_type.get(r["check"], 0) + 1
|
|
181
|
+
types = ", ".join(f"{k}={by_type[k]}" for k in sorted(by_type))
|
|
182
|
+
lines.append(p["green"](f"✓ fixed {fc} issue{'' if fc == 1 else 's'} in {ff} file{'' if ff == 1 else 's'}") + (p["dim"](f" ({types})") if types else ""))
|
|
183
|
+
else:
|
|
184
|
+
ic = summary["issueCount"]
|
|
185
|
+
fi = summary["filesWithIssues"]
|
|
186
|
+
types = ", ".join(f"{k}={summary['byType'][k]}" for k in sorted(summary["byType"]))
|
|
187
|
+
lines.append(p["red"](f"✖ {ic} whitespace issue{'' if ic == 1 else 's'} in {fi} file{'' if fi == 1 else 's'}") + p["dim"](f" ({types})"))
|
|
188
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
"""wssweep IO — directory walking and raw-byte file reading. No whitespace logic.
|
|
2
|
+
|
|
3
|
+
Both builds must discover the SAME file set (same ignore dirs, same binary skip by
|
|
4
|
+
extension + content, same symlink policy, same size cap, same glob exclude) and read
|
|
5
|
+
RAW BYTES (never text mode, which would translate newlines).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import re
|
|
10
|
+
import stat as _stat
|
|
11
|
+
|
|
12
|
+
IGNORE_DIRS = {
|
|
13
|
+
".git", ".svn", ".hg", "node_modules", "vendor", "dist", "build", "out",
|
|
14
|
+
"target", ".venv", "venv", ".tox", "coverage", ".next", ".nuxt", ".cache",
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
BINARY_EXTS = {
|
|
18
|
+
"png", "jpg", "jpeg", "gif", "webp", "ico", "bmp", "tiff", "pdf",
|
|
19
|
+
"zip", "gz", "tgz", "tar", "bz2", "xz", "7z", "rar", "jar", "war", "wasm",
|
|
20
|
+
"woff", "woff2", "ttf", "otf", "eot", "mp4", "mov", "avi", "webm", "mkv",
|
|
21
|
+
"mp3", "wav", "flac", "ogg", "exe", "dll", "so", "dylib", "o", "a", "class",
|
|
22
|
+
"bin", "dat", "db", "sqlite", "pyc", "pyo", "node", "lock",
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
SIZE_CAP = 5 * 1024 * 1024
|
|
26
|
+
|
|
27
|
+
_GLOB_SPECIAL = set("\\^$.|+()[]{}")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def to_posix(p):
|
|
31
|
+
s = p.replace(os.sep, "/")
|
|
32
|
+
return s[2:] if s.startswith("./") else s
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def ext_of(p):
|
|
36
|
+
return os.path.splitext(p)[1][1:].lower()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def root_exists(p):
|
|
40
|
+
try:
|
|
41
|
+
os.stat(p)
|
|
42
|
+
return True
|
|
43
|
+
except OSError:
|
|
44
|
+
return False
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def glob_to_re(glob):
|
|
48
|
+
out = []
|
|
49
|
+
i = 0
|
|
50
|
+
while i < len(glob):
|
|
51
|
+
c = glob[i]
|
|
52
|
+
if c == "*":
|
|
53
|
+
if i + 1 < len(glob) and glob[i + 1] == "*":
|
|
54
|
+
out.append(".*"); i += 1
|
|
55
|
+
else:
|
|
56
|
+
out.append("[^/]*")
|
|
57
|
+
elif c == "?":
|
|
58
|
+
out.append("[^/]")
|
|
59
|
+
elif c in _GLOB_SPECIAL:
|
|
60
|
+
out.append("\\" + c)
|
|
61
|
+
else:
|
|
62
|
+
out.append(c)
|
|
63
|
+
i += 1
|
|
64
|
+
return re.compile("".join(out))
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def make_excluder(patterns):
|
|
68
|
+
compiled = [(glob_to_re(g), "/" not in g) for g in (patterns or []) if g]
|
|
69
|
+
|
|
70
|
+
def excluded(posix_path):
|
|
71
|
+
base = posix_path.split("/")[-1]
|
|
72
|
+
return any(rx.fullmatch(base if is_base else posix_path) for rx, is_base in compiled)
|
|
73
|
+
|
|
74
|
+
return excluded
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def collect_files(roots, opts=None):
|
|
78
|
+
opts = opts or {}
|
|
79
|
+
all_ = bool(opts.get("all"))
|
|
80
|
+
excluded = make_excluder(opts.get("exclude"))
|
|
81
|
+
out = []
|
|
82
|
+
|
|
83
|
+
def visit(p, is_root):
|
|
84
|
+
try:
|
|
85
|
+
st = os.stat(p) if is_root else os.lstat(p)
|
|
86
|
+
except OSError:
|
|
87
|
+
return
|
|
88
|
+
if not is_root and _stat.S_ISLNK(st.st_mode):
|
|
89
|
+
return
|
|
90
|
+
posix = to_posix(p)
|
|
91
|
+
if _stat.S_ISDIR(st.st_mode):
|
|
92
|
+
if not is_root and ((not all_ and os.path.basename(p) in IGNORE_DIRS) or excluded(posix)):
|
|
93
|
+
return
|
|
94
|
+
try:
|
|
95
|
+
entries = os.listdir(p)
|
|
96
|
+
except OSError:
|
|
97
|
+
return
|
|
98
|
+
entries.sort(key=lambda n: n.encode("utf-8"))
|
|
99
|
+
for e in entries:
|
|
100
|
+
visit(os.path.join(p, e), False)
|
|
101
|
+
elif _stat.S_ISREG(st.st_mode):
|
|
102
|
+
if not is_root and excluded(posix):
|
|
103
|
+
return
|
|
104
|
+
if not all_ and ext_of(p) in BINARY_EXTS:
|
|
105
|
+
return
|
|
106
|
+
out.append(p)
|
|
107
|
+
|
|
108
|
+
for r in roots:
|
|
109
|
+
visit(r, True)
|
|
110
|
+
return out
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def read_file_bytes(p, all_):
|
|
114
|
+
try:
|
|
115
|
+
with open(p, "rb") as fh:
|
|
116
|
+
buf = fh.read()
|
|
117
|
+
except OSError:
|
|
118
|
+
return None
|
|
119
|
+
if not all_ and len(buf) > SIZE_CAP:
|
|
120
|
+
return None
|
|
121
|
+
if b"\x00" in buf[:8192]:
|
|
122
|
+
return None
|
|
123
|
+
try:
|
|
124
|
+
buf.decode("utf-8")
|
|
125
|
+
except UnicodeDecodeError:
|
|
126
|
+
return None
|
|
127
|
+
return buf
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def bytes_to_latin1(buf):
|
|
131
|
+
return buf.decode("latin-1")
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def latin1_to_bytes(s):
|
|
135
|
+
return s.encode("latin-1")
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def write_file_atomic(p, data):
|
|
139
|
+
d = os.path.dirname(p) or "."
|
|
140
|
+
tmp = os.path.join(d, ".wssweep-%d-%s.tmp" % (os.getpid(), os.path.basename(p)))
|
|
141
|
+
try:
|
|
142
|
+
mode = os.stat(p).st_mode
|
|
143
|
+
except OSError:
|
|
144
|
+
mode = None
|
|
145
|
+
with open(tmp, "wb") as fh:
|
|
146
|
+
fh.write(data)
|
|
147
|
+
if mode is not None:
|
|
148
|
+
try:
|
|
149
|
+
os.chmod(tmp, mode)
|
|
150
|
+
except OSError:
|
|
151
|
+
pass
|
|
152
|
+
os.replace(tmp, p)
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: wssweep
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Zero-config whitespace doctor: find (and --fix) trailing whitespace, mixed line endings, missing/extra final newlines, BOMs and mixed indentation. Zero dependencies.
|
|
5
|
+
Author: yyfjj
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/jjdoor/wssweep-py
|
|
8
|
+
Project-URL: Repository, https://github.com/jjdoor/wssweep-py
|
|
9
|
+
Project-URL: Issues, https://github.com/jjdoor/wssweep-py/issues
|
|
10
|
+
Keywords: whitespace,trailing-whitespace,line-endings,crlf,eol,lint,formatter,cli,ci
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
18
|
+
Classifier: Topic :: Utilities
|
|
19
|
+
Requires-Python: >=3.8
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
|
|
24
|
+
# wssweep
|
|
25
|
+
|
|
26
|
+
**A zero-config whitespace doctor.** Run it on any repo and it instantly finds —
|
|
27
|
+
and with `--fix`, cleans — the whitespace problems that pollute diffs and break
|
|
28
|
+
across platforms: trailing whitespace, mixed CRLF/LF line endings, a missing
|
|
29
|
+
final newline, extra trailing blank lines, a UTF-8 BOM, lone CRs, and tabs mixed
|
|
30
|
+
with spaces in indentation.
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
pipx run wssweep
|
|
34
|
+
# config.yml (1)
|
|
35
|
+
# - mixed-eol mixed line endings (CRLF×3, LF×1)
|
|
36
|
+
#
|
|
37
|
+
# src/app.js (2)
|
|
38
|
+
# - missing-final-newline no newline at end of file
|
|
39
|
+
# 14: trailing-whitespace trailing whitespace
|
|
40
|
+
#
|
|
41
|
+
# ✖ 3 whitespace issues in 2 files (missing-final-newline=1, mixed-eol=1, trailing-whitespace=1)
|
|
42
|
+
|
|
43
|
+
pipx run wssweep --fix # clean them in place
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
No config file, no framework. Exits non-zero when it finds issues, so it drops
|
|
47
|
+
straight into CI. Pure standard library. Also on npm (`npx wssweep`) — the two
|
|
48
|
+
builds produce **byte-for-byte identical** output *and* identical fixes.
|
|
49
|
+
|
|
50
|
+
## Why another whitespace tool?
|
|
51
|
+
|
|
52
|
+
Because today this takes three or four tools wired together:
|
|
53
|
+
|
|
54
|
+
- **editorconfig-checker** reports, but needs you to author an `.editorconfig`
|
|
55
|
+
first, and it can't fix anything.
|
|
56
|
+
- **pre-commit**'s `trailing-whitespace` / `end-of-file-fixer` /
|
|
57
|
+
`mixed-line-ending` hooks *do* fix — but only inside the pre-commit framework,
|
|
58
|
+
and they're three separate hooks. Nobody runs them ad-hoc on a fresh checkout.
|
|
59
|
+
- **prettier** fixes whitespace only as a side effect of reformatting all your
|
|
60
|
+
code, is language-aware, and won't touch files it can't parse.
|
|
61
|
+
- **dos2unix** only does line endings.
|
|
62
|
+
|
|
63
|
+
`wssweep` is the one command — `pip`/`npx`, zero config — that reports *all*
|
|
64
|
+
seven whitespace smells at once with line numbers and optionally fixes them in
|
|
65
|
+
place, with a clean CI exit code, identical on Python and Node.
|
|
66
|
+
|
|
67
|
+
## What it checks
|
|
68
|
+
|
|
69
|
+
| check | what | `--fix` |
|
|
70
|
+
|---|---|---|
|
|
71
|
+
| `trailing-whitespace` | space/tab at end of a line | trims it |
|
|
72
|
+
| `mixed-eol` | a file containing **both** CRLF and LF | normalizes to LF |
|
|
73
|
+
| `lone-cr` | a bare CR (old-Mac line ending) | normalizes |
|
|
74
|
+
| `missing-final-newline` | non-empty file not ending in a newline | appends one |
|
|
75
|
+
| `trailing-blank-lines` | extra blank line(s) at end of file | collapses to one |
|
|
76
|
+
| `utf8-bom` | a leading UTF-8 BOM | strips it |
|
|
77
|
+
| `mixed-indentation` | tabs **and** spaces in one indent | report-only (needs your tab width) |
|
|
78
|
+
|
|
79
|
+
Opinionated, zero-config defaults: a consistently-CRLF file is **fine** (only
|
|
80
|
+
*mixed* endings are flagged), `.bat`/`.cmd` keep CRLF when fixed, and Markdown's
|
|
81
|
+
two-trailing-spaces hard line break is preserved (trailing-whitespace is skipped
|
|
82
|
+
in `.md`).
|
|
83
|
+
|
|
84
|
+
## Usage
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
wssweep # scan the current directory
|
|
88
|
+
wssweep src/ docs/ # scan specific paths
|
|
89
|
+
wssweep --fix # fix in place (atomic; only files that change)
|
|
90
|
+
wssweep --crlf --fix # normalize endings to CRLF instead of LF
|
|
91
|
+
wssweep --skip=mixed-indentation # turn off a check
|
|
92
|
+
wssweep --exclude='*.min.js' # skip paths by glob (repeatable)
|
|
93
|
+
wssweep --json # machine output (byte-identical both builds)
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
`.git`, `node_modules`, `dist`, `build`, `vendor`, `.venv` and friends are
|
|
97
|
+
skipped by default, as are binary files (detected by extension + a NUL-byte /
|
|
98
|
+
non-UTF-8 content check) and files over 5 MB. `--all` overrides those skips.
|
|
99
|
+
|
|
100
|
+
Exit codes: `0` clean · `1` issues found · `2` error. (`--fix` exits `0` once
|
|
101
|
+
everything fixable is fixed; a leftover `mixed-indentation` keeps it `1`.)
|
|
102
|
+
|
|
103
|
+
## How it works
|
|
104
|
+
|
|
105
|
+
It reads every file as **raw bytes** and scans a byte-faithful (latin-1) view, so
|
|
106
|
+
it never mangles encodings and the Python and Node builds agree to the byte: line
|
|
107
|
+
endings are classified from the bytes (never `splitlines`, which over-splits),
|
|
108
|
+
"whitespace" means exactly space and tab (never `\s`, which differs across
|
|
109
|
+
languages), and `--fix` writes raw bytes atomically (temp file + rename),
|
|
110
|
+
touching only files that actually change and preserving file modes. Fixing is
|
|
111
|
+
idempotent — run it twice, the second run does nothing.
|
|
112
|
+
|
|
113
|
+
## Install
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
pip install wssweep # or pipx run wssweep
|
|
117
|
+
npm i -g wssweep # Node build, identical behaviour
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
Python ≥ 3.8 or Node ≥ 18. No dependencies.
|
|
121
|
+
|
|
122
|
+
## License
|
|
123
|
+
|
|
124
|
+
MIT
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
src/wssweep/__init__.py
|
|
5
|
+
src/wssweep/__main__.py
|
|
6
|
+
src/wssweep/cli.py
|
|
7
|
+
src/wssweep/core.py
|
|
8
|
+
src/wssweep/walk.py
|
|
9
|
+
src/wssweep.egg-info/PKG-INFO
|
|
10
|
+
src/wssweep.egg-info/SOURCES.txt
|
|
11
|
+
src/wssweep.egg-info/dependency_links.txt
|
|
12
|
+
src/wssweep.egg-info/entry_points.txt
|
|
13
|
+
src/wssweep.egg-info/top_level.txt
|
|
14
|
+
tests/test_core.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
wssweep
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
from wssweep import core
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def checks(s, ext="txt"):
|
|
5
|
+
return [i["check"] for i in core.analyze(s, ext)]
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def fix(s, ext="txt", opts=None):
|
|
9
|
+
return core.fix(s, ext, opts)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_trailing_whitespace_per_line():
|
|
13
|
+
f = core.analyze("foo \nbar\t\nbaz\n", "txt")
|
|
14
|
+
assert [i["line"] for i in f if i["check"] == "trailing-whitespace"] == [1, 2]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_trailing_ws_before_crlf():
|
|
18
|
+
assert checks("x \r\n") == ["trailing-whitespace"]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_mixed_eol():
|
|
22
|
+
assert checks("a\r\nb\nc\r\n") == ["mixed-eol"]
|
|
23
|
+
assert checks("a\r\nb\r\n") == []
|
|
24
|
+
assert checks("a\nb\n") == []
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_lone_cr():
|
|
28
|
+
assert checks("a\rb\rc\n") == ["lone-cr"]
|
|
29
|
+
assert checks("a\r\nb\r\n") == []
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_missing_final_newline():
|
|
33
|
+
assert checks("x") == ["missing-final-newline"]
|
|
34
|
+
assert checks("") == []
|
|
35
|
+
assert checks("x\n") == []
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_trailing_blank_lines():
|
|
39
|
+
assert checks("x\n\n\n") == ["trailing-blank-lines"]
|
|
40
|
+
assert checks("x\n") == []
|
|
41
|
+
assert sorted(checks("x\n \n")) == ["trailing-blank-lines", "trailing-whitespace"]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def test_bom():
|
|
45
|
+
assert checks("\xef\xbb\xbfhi\n") == ["utf8-bom"]
|
|
46
|
+
assert fix("\xef\xbb\xbfhi\n") == "hi\n"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_mixed_indentation():
|
|
50
|
+
assert checks("\t x\n", "py") == ["mixed-indentation"]
|
|
51
|
+
assert checks(" \tx\n", "py") == ["mixed-indentation"]
|
|
52
|
+
assert checks("x = 1\t# aligned\n", "py") == []
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_markdown_skip_trailing_ws():
|
|
56
|
+
assert checks("line one \nline two\n", "md") == []
|
|
57
|
+
assert fix("line one \nline two\n", "md") == "line one \nline two\n"
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def test_fix_normalizes():
|
|
61
|
+
assert fix("a \r\nb\n\n\n") == "a\nb\n"
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def test_fix_keeps_consistent_crlf():
|
|
65
|
+
assert fix("a \r\nb\r\n") == "a\r\nb\r\n"
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def test_fix_bat_forced_crlf():
|
|
69
|
+
assert fix("a\nb\n", "bat") == "a\r\nb\r\n"
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def test_fix_crlf_idempotent():
|
|
73
|
+
once = fix("a \nb", "txt", {"crlf": True})
|
|
74
|
+
assert once == "a\r\nb\r\n"
|
|
75
|
+
assert fix(once, "txt", {"crlf": True}) == once
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def test_fix_final_newline_and_empty():
|
|
79
|
+
assert fix("x") == "x\n"
|
|
80
|
+
assert fix("") == ""
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def test_fix_respects_skip():
|
|
84
|
+
assert fix("a \n", "txt", {"skip": {"trailing-whitespace"}}) == "a \n"
|
|
85
|
+
assert fix("\xef\xbb\xbfhi\n", "txt", {"skip": {"utf8-bom"}}) == "\xef\xbb\xbfhi\n"
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def test_fix_idempotent_full():
|
|
89
|
+
once = fix("\xef\xbb\xbfa \r\nb\n\n\n")
|
|
90
|
+
assert fix(once) == once
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def test_json_and_summarize():
|
|
94
|
+
recs = core.sort_file_issues([
|
|
95
|
+
{"file": "b.txt", "check": "utf8-bom", "line": None, "message": "m"},
|
|
96
|
+
{"file": "a.txt", "check": "trailing-whitespace", "line": 2, "message": "m"},
|
|
97
|
+
{"file": "a.txt", "check": "trailing-whitespace", "line": 1, "message": "m"},
|
|
98
|
+
])
|
|
99
|
+
assert [f"{r['file']}:{r['line']}" for r in recs] == ["a.txt:1", "a.txt:2", "b.txt:None"]
|
|
100
|
+
sum_ = core.summarize(recs, 2)
|
|
101
|
+
assert sum_ == {"filesScanned": 2, "filesWithIssues": 2, "issueCount": 3, "byType": {"trailing-whitespace": 2, "utf8-bom": 1}}
|
|
102
|
+
j = core.to_json(recs, sum_)
|
|
103
|
+
assert j["version"] == 1
|
|
104
|
+
assert j["files"][0]["path"] == "a.txt"
|
|
105
|
+
assert len(j["files"][0]["issues"]) == 2
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def test_fix_mode_excludes_report_only():
|
|
109
|
+
recs = core.sort_file_issues([
|
|
110
|
+
{"file": "a.py", "check": "mixed-indentation", "line": 2, "message": "mixed"},
|
|
111
|
+
{"file": "a.py", "check": "trailing-whitespace", "line": 1, "message": "trailing whitespace"},
|
|
112
|
+
])
|
|
113
|
+
out = core.format_report(recs, core.summarize(recs, 1), None, {"fixed": True})
|
|
114
|
+
assert "✓ fixed 1 issue in 1 file" in out
|
|
115
|
+
assert "fixed trailing-whitespace" in out
|
|
116
|
+
assert "fixed mixed-indentation" not in out
|
|
117
|
+
assert "(trailing-whitespace=1)" in out
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def test_format_report():
|
|
121
|
+
assert "no whitespace issues" in core.format_report([], core.summarize([], 3), None, {})
|
|
122
|
+
recs = [{"file": "a.txt", "check": "utf8-bom", "line": None, "message": "UTF-8 BOM"}]
|
|
123
|
+
assert "1 whitespace issue in 1 file" in core.format_report(recs, core.summarize(recs, 1), None, {})
|