tackbox 0.1.14__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tackbox/__init__.py +1 -0
- tackbox/bin/erclint-opengrep.exe +0 -0
- tackbox/bin/erclint.exe +0 -0
- tackbox/cache.py +351 -0
- tackbox/cli.py +349 -0
- tackbox/doctor.py +241 -0
- tackbox/engines.json +265 -0
- tackbox/engines.py +408 -0
- tackbox/rules/bin/tackbox-eslint.js +26 -0
- tackbox/rules/bin/tackbox-mdlint.js +36 -0
- tackbox/rules/eslint.config.preset.js +18 -0
- tackbox/rules/js/eslint-plugin.js +36 -0
- tackbox/rules/js/markdownlint-rules/no-non-ascii.js +27 -0
- tackbox/rules/js/rules/_shared.js +151 -0
- tackbox/rules/js/rules/no-console-error.js +20 -0
- tackbox/rules/js/rules/no-secret-in-report.js +56 -0
- tackbox/rules/js/rules/no-swallow-catch.js +24 -0
- tackbox/rules/js/rules/no-swallow-promise-catch.js +37 -0
- tackbox/rules/js/rules/no-throw-and-report.js +23 -0
- tackbox/rules/js/rules/valid-dedup-key.js +41 -0
- tackbox/rules/js/rules/valid-error-report.js +87 -0
- tackbox/rules/js/rules/valid-throw-error.js +39 -0
- tackbox/rules/opengrep/erc006-dedupkey.yaml +47 -0
- tackbox/rules/opengrep/erc006-fingerprint-secrets.yaml +45 -0
- tackbox/rules/opengrep/erc006-fingerprint-user-input-go.yaml +21 -0
- tackbox/source_set.py +178 -0
- tackbox/third_party/licenses/tackbox.LICENSE.txt +21 -0
- tackbox-0.1.14.dist-info/METADATA +8 -0
- tackbox-0.1.14.dist-info/RECORD +32 -0
- tackbox-0.1.14.dist-info/WHEEL +5 -0
- tackbox-0.1.14.dist-info/entry_points.txt +2 -0
- tackbox-0.1.14.dist-info/top_level.txt +1 -0
tackbox/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.14"
|
|
Binary file
|
tackbox/bin/erclint.exe
ADDED
|
Binary file
|
tackbox/cache.py
ADDED
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
"""(unit, engine) cache: layout, digests, marker ops, GC.
|
|
2
|
+
|
|
3
|
+
Layout: `<TACKBOX_CACHE_HOME>/v1/<engines-hash>/<unit-digest>.<engine-id>`.
|
|
4
|
+
Marker is an empty file. `TACKBOX_CACHE_HOME` defaults to `~/.cache/tackbox`;
|
|
5
|
+
tests point it at a tmp dir.
|
|
6
|
+
|
|
7
|
+
Semantics per plan:
|
|
8
|
+
- Marker written only on success. Failures are not cached.
|
|
9
|
+
- Cache hit re-touches the marker: mtime is the LRU signal.
|
|
10
|
+
- Corrupt / unreadable marker -> treated as miss (rerun), never fatal.
|
|
11
|
+
- `mark_clean` is best-effort; cache is an optimisation, never a hard error.
|
|
12
|
+
|
|
13
|
+
Unit granularity:
|
|
14
|
+
- eslint / mdlint / opengrep -> unit = file, digest = sha256(content).
|
|
15
|
+
- erclint -> unit = Go package, digest = sha256(import path + own .go files +
|
|
16
|
+
transitive in-module deps' .go files + go.mod + go.sum). A signature change
|
|
17
|
+
in package B invalidates every in-module package that depends on B.
|
|
18
|
+
|
|
19
|
+
engines-hash:
|
|
20
|
+
- Dev mode digests the engine payload sources (go/, js/, bin/, the eslint
|
|
21
|
+
preset, npm manifest and lockfile), so editing any rule invalidates prior
|
|
22
|
+
markers and stale clean-results can never mask new findings. The
|
|
23
|
+
orchestrator under py/ is deliberately outside the payload. Hermetic
|
|
24
|
+
wheels (step 6) replace this with the bundled-payload digest.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
import hashlib
|
|
30
|
+
import json
|
|
31
|
+
import os
|
|
32
|
+
import shutil
|
|
33
|
+
import subprocess
|
|
34
|
+
from dataclasses import dataclass
|
|
35
|
+
from pathlib import Path
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
CACHE_ROOT_ENV = "TACKBOX_CACHE_HOME"
|
|
39
|
+
CACHE_VERSION = "v1"
|
|
40
|
+
SOFT_CAP = 20000
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def default_cache_root() -> Path:
|
|
44
|
+
"""Root under which the `v1/<engines-hash>/...` tree lives."""
|
|
45
|
+
override = os.environ.get(CACHE_ROOT_ENV)
|
|
46
|
+
if override:
|
|
47
|
+
return Path(override) / CACHE_VERSION
|
|
48
|
+
return Path.home() / ".cache" / "tackbox" / CACHE_VERSION
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass(frozen=True)
|
|
52
|
+
class CacheKey:
|
|
53
|
+
engines_hash: str
|
|
54
|
+
unit_digest: str
|
|
55
|
+
engine_id: str
|
|
56
|
+
|
|
57
|
+
def marker(self, root: Path) -> Path:
|
|
58
|
+
return root / self.engines_hash / f"{self.unit_digest}.{self.engine_id}"
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# Engine payload in dev mode: everything that shapes findings, nothing else.
|
|
62
|
+
_DEV_PAYLOAD = (
|
|
63
|
+
"go",
|
|
64
|
+
"js",
|
|
65
|
+
"bin",
|
|
66
|
+
"eslint.config.preset.js",
|
|
67
|
+
"package.json",
|
|
68
|
+
"package-lock.json",
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def engines_hash_dev(tackbox_root: Path) -> str:
|
|
73
|
+
h = hashlib.sha256()
|
|
74
|
+
h.update(b"dev-payload-v1\n")
|
|
75
|
+
for top in _DEV_PAYLOAD:
|
|
76
|
+
p = tackbox_root / top
|
|
77
|
+
if p.is_file():
|
|
78
|
+
_hash_payload_file(h, top, p)
|
|
79
|
+
elif p.is_dir():
|
|
80
|
+
for f in sorted(p.rglob("*")):
|
|
81
|
+
if f.is_file() and "__pycache__" not in f.parts:
|
|
82
|
+
_hash_payload_file(h, f.relative_to(tackbox_root).as_posix(), f)
|
|
83
|
+
return h.hexdigest()
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _hash_payload_file(h, rel: str, path: Path) -> None:
|
|
87
|
+
h.update(rel.encode())
|
|
88
|
+
h.update(b"\t")
|
|
89
|
+
h.update(sha256_file(path).encode())
|
|
90
|
+
h.update(b"\n")
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def is_cached(key: CacheKey, root: Path) -> bool:
|
|
94
|
+
"""Return True iff a valid marker file exists; re-touch for LRU on hit."""
|
|
95
|
+
p = key.marker(root)
|
|
96
|
+
try:
|
|
97
|
+
if not p.is_file():
|
|
98
|
+
return False
|
|
99
|
+
p.touch()
|
|
100
|
+
return True
|
|
101
|
+
except OSError:
|
|
102
|
+
return False
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def mark_clean(key: CacheKey, root: Path) -> None:
|
|
106
|
+
"""Write empty marker; swallow any OSError so cache never blocks a run."""
|
|
107
|
+
p = key.marker(root)
|
|
108
|
+
try:
|
|
109
|
+
p.parent.mkdir(parents=True, exist_ok=True)
|
|
110
|
+
p.touch()
|
|
111
|
+
except OSError:
|
|
112
|
+
pass
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def sha256_file(path: Path) -> str:
|
|
116
|
+
h = hashlib.sha256()
|
|
117
|
+
with path.open("rb") as f:
|
|
118
|
+
for chunk in iter(lambda: f.read(65536), b""):
|
|
119
|
+
h.update(chunk)
|
|
120
|
+
return h.hexdigest()
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def sha256_tree(root: Path) -> str:
|
|
124
|
+
"""Deterministic digest of a directory tree: sorted rel paths + content.
|
|
125
|
+
|
|
126
|
+
Shared by the wheel builder (stamping engines.json) and doctor
|
|
127
|
+
(verifying it) - one implementation so the two can never drift.
|
|
128
|
+
"""
|
|
129
|
+
h = hashlib.sha256()
|
|
130
|
+
for f in sorted(root.rglob("*")):
|
|
131
|
+
if not f.is_file():
|
|
132
|
+
continue
|
|
133
|
+
h.update(f.relative_to(root).as_posix().encode())
|
|
134
|
+
h.update(b"\0")
|
|
135
|
+
h.update(sha256_file(f).encode())
|
|
136
|
+
h.update(b"\0")
|
|
137
|
+
return h.hexdigest()
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def gc_stale_engines(current: str, root: Path) -> None:
|
|
141
|
+
"""Drop every `<engines-hash>/` sibling other than `current`."""
|
|
142
|
+
if not root.is_dir():
|
|
143
|
+
return
|
|
144
|
+
for entry in root.iterdir():
|
|
145
|
+
if entry.is_dir() and entry.name != current:
|
|
146
|
+
shutil.rmtree(entry, ignore_errors=True)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def gc_soft_cap(engines_hash: str, cap: int, root: Path) -> None:
|
|
150
|
+
"""Trim markers in the current dir when the count exceeds `cap`.
|
|
151
|
+
|
|
152
|
+
Sort by mtime ascending; drop from the front until at or under cap. Files
|
|
153
|
+
that vanish under us (concurrent run) are ignored - GC never blocks.
|
|
154
|
+
"""
|
|
155
|
+
d = root / engines_hash
|
|
156
|
+
if not d.is_dir():
|
|
157
|
+
return
|
|
158
|
+
markers = [p for p in d.iterdir() if p.is_file()]
|
|
159
|
+
if len(markers) <= cap:
|
|
160
|
+
return
|
|
161
|
+
|
|
162
|
+
def _mtime(p: Path) -> float:
|
|
163
|
+
# A marker can vanish between iterdir and stat (concurrent run);
|
|
164
|
+
# treat it as oldest so unlink handles it, never raise.
|
|
165
|
+
try:
|
|
166
|
+
return p.stat().st_mtime
|
|
167
|
+
except OSError:
|
|
168
|
+
return 0.0
|
|
169
|
+
|
|
170
|
+
markers.sort(key=_mtime)
|
|
171
|
+
for m in markers[: len(markers) - cap]:
|
|
172
|
+
try:
|
|
173
|
+
m.unlink()
|
|
174
|
+
except OSError:
|
|
175
|
+
pass
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
# -- erclint package digest -----------------------------------------------
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def erclint_package_digests(
|
|
182
|
+
repo_root: Path, package_dirs: list[str]
|
|
183
|
+
) -> dict[str, str]:
|
|
184
|
+
"""Compute {package_dir: unit_digest} for erclint units.
|
|
185
|
+
|
|
186
|
+
Runs `go list -deps -json` once for all requested packages, filters to
|
|
187
|
+
same-module deps, then hashes each package's own .go files together with
|
|
188
|
+
the .go files of its transitive in-module deps plus go.mod / go.sum. A
|
|
189
|
+
change to any file that erclint would see for a package flips the digest.
|
|
190
|
+
|
|
191
|
+
Missing / not-a-package entries are dropped from the returned map; the
|
|
192
|
+
caller decides what to do (usually: skip caching for that entry).
|
|
193
|
+
"""
|
|
194
|
+
if not package_dirs:
|
|
195
|
+
return {}
|
|
196
|
+
args = [f"./{p}" for p in package_dirs]
|
|
197
|
+
completed = subprocess.run(
|
|
198
|
+
["go", "list", "-deps", "-json", *args],
|
|
199
|
+
cwd=repo_root,
|
|
200
|
+
capture_output=True,
|
|
201
|
+
text=True,
|
|
202
|
+
check=True,
|
|
203
|
+
)
|
|
204
|
+
pkgs = list(_iter_json_objects(completed.stdout))
|
|
205
|
+
|
|
206
|
+
our_module = _module_path_from_pkgs(pkgs)
|
|
207
|
+
if our_module is None:
|
|
208
|
+
return {}
|
|
209
|
+
|
|
210
|
+
in_module: dict[str, dict] = {}
|
|
211
|
+
for p in pkgs:
|
|
212
|
+
if p.get("Standard"):
|
|
213
|
+
continue
|
|
214
|
+
mod = p.get("Module") or {}
|
|
215
|
+
if mod.get("Path") != our_module:
|
|
216
|
+
continue
|
|
217
|
+
import_path = p["ImportPath"]
|
|
218
|
+
dir_ = Path(p["Dir"])
|
|
219
|
+
go_files = list(p.get("GoFiles") or [])
|
|
220
|
+
# Deps is the transitive closure per `go list -json`; filter later.
|
|
221
|
+
deps = set(p.get("Deps") or [])
|
|
222
|
+
in_module[import_path] = {
|
|
223
|
+
"dir": dir_,
|
|
224
|
+
"files": [dir_ / f for f in go_files],
|
|
225
|
+
"deps": deps,
|
|
226
|
+
}
|
|
227
|
+
for info in in_module.values():
|
|
228
|
+
info["deps"] = {d for d in info["deps"] if d in in_module}
|
|
229
|
+
|
|
230
|
+
file_digest: dict[Path, str] = {}
|
|
231
|
+
for info in in_module.values():
|
|
232
|
+
for f in info["files"]:
|
|
233
|
+
if f not in file_digest and f.is_file():
|
|
234
|
+
file_digest[f] = sha256_file(f)
|
|
235
|
+
|
|
236
|
+
go_mod_digest = _optional_file_digest(repo_root / "go.mod")
|
|
237
|
+
go_sum_digest = _optional_file_digest(repo_root / "go.sum")
|
|
238
|
+
|
|
239
|
+
dir_to_import: dict[str, str] = {}
|
|
240
|
+
repo_resolved = repo_root.resolve()
|
|
241
|
+
for import_path, info in in_module.items():
|
|
242
|
+
try:
|
|
243
|
+
rel = info["dir"].resolve().relative_to(repo_resolved)
|
|
244
|
+
except ValueError:
|
|
245
|
+
continue
|
|
246
|
+
key = str(rel) if str(rel) != "." else "."
|
|
247
|
+
dir_to_import[key] = import_path
|
|
248
|
+
|
|
249
|
+
result: dict[str, str] = {}
|
|
250
|
+
for pkg_dir in package_dirs:
|
|
251
|
+
import_path = dir_to_import.get(pkg_dir)
|
|
252
|
+
if import_path is None:
|
|
253
|
+
continue
|
|
254
|
+
result[pkg_dir] = _package_digest(
|
|
255
|
+
import_path, in_module, file_digest, go_mod_digest, go_sum_digest
|
|
256
|
+
)
|
|
257
|
+
return result
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def erclint_import_paths(
|
|
261
|
+
repo_root: Path, package_dirs: list[str]
|
|
262
|
+
) -> dict[str, str]:
|
|
263
|
+
"""Return {package_dir: import_path}. Used to interpret erclint findings."""
|
|
264
|
+
if not package_dirs:
|
|
265
|
+
return {}
|
|
266
|
+
args = [f"./{p}" for p in package_dirs]
|
|
267
|
+
completed = subprocess.run(
|
|
268
|
+
["go", "list", "-json", *args],
|
|
269
|
+
cwd=repo_root,
|
|
270
|
+
capture_output=True,
|
|
271
|
+
text=True,
|
|
272
|
+
check=True,
|
|
273
|
+
)
|
|
274
|
+
pkgs = list(_iter_json_objects(completed.stdout))
|
|
275
|
+
repo_resolved = repo_root.resolve()
|
|
276
|
+
result: dict[str, str] = {}
|
|
277
|
+
for p in pkgs:
|
|
278
|
+
try:
|
|
279
|
+
rel = Path(p["Dir"]).resolve().relative_to(repo_resolved)
|
|
280
|
+
except ValueError:
|
|
281
|
+
continue
|
|
282
|
+
key = str(rel) if str(rel) != "." else "."
|
|
283
|
+
if key in package_dirs:
|
|
284
|
+
result[key] = p["ImportPath"]
|
|
285
|
+
return result
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def _package_digest(
|
|
289
|
+
import_path: str,
|
|
290
|
+
in_module: dict[str, dict],
|
|
291
|
+
file_digest: dict[Path, str],
|
|
292
|
+
go_mod_digest: str,
|
|
293
|
+
go_sum_digest: str,
|
|
294
|
+
) -> str:
|
|
295
|
+
h = hashlib.sha256()
|
|
296
|
+
h.update(import_path.encode())
|
|
297
|
+
h.update(b"\n---self---\n")
|
|
298
|
+
_hash_files(h, in_module[import_path]["files"], file_digest)
|
|
299
|
+
h.update(b"---deps---\n")
|
|
300
|
+
for dep in sorted(in_module[import_path]["deps"]):
|
|
301
|
+
h.update(dep.encode())
|
|
302
|
+
h.update(b"\n")
|
|
303
|
+
_hash_files(h, in_module[dep]["files"], file_digest)
|
|
304
|
+
h.update(b"---go.mod---\n")
|
|
305
|
+
h.update(go_mod_digest.encode())
|
|
306
|
+
h.update(b"\n---go.sum---\n")
|
|
307
|
+
h.update(go_sum_digest.encode())
|
|
308
|
+
return h.hexdigest()
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _hash_files(h, files, file_digest: dict[Path, str]) -> None:
|
|
312
|
+
for f in sorted(files):
|
|
313
|
+
digest = file_digest.get(f)
|
|
314
|
+
if digest is None:
|
|
315
|
+
continue
|
|
316
|
+
h.update(f.name.encode())
|
|
317
|
+
h.update(b"\t")
|
|
318
|
+
h.update(digest.encode())
|
|
319
|
+
h.update(b"\n")
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def _optional_file_digest(path: Path) -> str:
|
|
323
|
+
if not path.is_file():
|
|
324
|
+
return ""
|
|
325
|
+
return sha256_file(path)
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def _module_path_from_pkgs(pkgs: list[dict]) -> str | None:
|
|
329
|
+
for p in pkgs:
|
|
330
|
+
if p.get("Standard"):
|
|
331
|
+
continue
|
|
332
|
+
mod = p.get("Module") or {}
|
|
333
|
+
path = mod.get("Path")
|
|
334
|
+
if path:
|
|
335
|
+
return path
|
|
336
|
+
return None
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def _iter_json_objects(text: str):
|
|
340
|
+
"""Iterate over concatenated top-level JSON objects in `text`."""
|
|
341
|
+
decoder = json.JSONDecoder()
|
|
342
|
+
idx = 0
|
|
343
|
+
n = len(text)
|
|
344
|
+
while idx < n:
|
|
345
|
+
while idx < n and text[idx].isspace():
|
|
346
|
+
idx += 1
|
|
347
|
+
if idx >= n:
|
|
348
|
+
break
|
|
349
|
+
obj, end = decoder.raw_decode(text, idx)
|
|
350
|
+
yield obj
|
|
351
|
+
idx = end
|
tackbox/cli.py
ADDED
|
@@ -0,0 +1,349 @@
|
|
|
1
|
+
"""tackbox lint / doctor CLI entry point."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import subprocess
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from . import __version__, cache, doctor
|
|
11
|
+
from .engines import (
|
|
12
|
+
EngineResult,
|
|
13
|
+
EngineSpec,
|
|
14
|
+
active_engines,
|
|
15
|
+
dispatch,
|
|
16
|
+
engines_hash_hermetic,
|
|
17
|
+
is_hermetic,
|
|
18
|
+
parse_erclint_findings,
|
|
19
|
+
resolve_dev_versions,
|
|
20
|
+
resolve_hermetic_versions,
|
|
21
|
+
run_engines,
|
|
22
|
+
)
|
|
23
|
+
from .source_set import (
|
|
24
|
+
PathspecMagicError,
|
|
25
|
+
filter_source_set,
|
|
26
|
+
parse_git_diff_names,
|
|
27
|
+
parse_ls_files_stage,
|
|
28
|
+
parse_ls_files_untracked,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ChangedScopeError(ValueError):
|
|
33
|
+
"""Raised when the git commands backing --changed / --since fail."""
|
|
34
|
+
|
|
35
|
+
_BANNER_ORDER = ("erclint", "opengrep", "node", "eslint", "markdownlint")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def main(argv: list[str] | None = None) -> int:
|
|
39
|
+
args = _parse_argv(sys.argv[1:] if argv is None else argv)
|
|
40
|
+
if args.command == "lint":
|
|
41
|
+
try:
|
|
42
|
+
return _run_lint(
|
|
43
|
+
args.path,
|
|
44
|
+
no_cache=args.no_cache,
|
|
45
|
+
changed=args.changed,
|
|
46
|
+
since=args.since,
|
|
47
|
+
)
|
|
48
|
+
except (PathspecMagicError, ChangedScopeError) as e:
|
|
49
|
+
print(f"tackbox: {e}", file=sys.stderr)
|
|
50
|
+
return 2
|
|
51
|
+
if args.command == "doctor":
|
|
52
|
+
_print_banner(_tackbox_root())
|
|
53
|
+
return doctor.run(sys.stdout)
|
|
54
|
+
print(f"tackbox: unknown command {args.command!r}", file=sys.stderr)
|
|
55
|
+
return 2
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _parse_argv(argv: list[str]) -> argparse.Namespace:
|
|
59
|
+
parser = argparse.ArgumentParser(prog="tackbox")
|
|
60
|
+
sub = parser.add_subparsers(dest="command", required=True)
|
|
61
|
+
lint = sub.add_parser("lint", help="lint the source set")
|
|
62
|
+
lint.add_argument("path", nargs="?", default=".", help="scope path (default: .)")
|
|
63
|
+
lint.add_argument(
|
|
64
|
+
"--no-cache",
|
|
65
|
+
action="store_true",
|
|
66
|
+
help="ignore and do not write the (unit, engine) cache",
|
|
67
|
+
)
|
|
68
|
+
lint.add_argument(
|
|
69
|
+
"--changed",
|
|
70
|
+
action="store_true",
|
|
71
|
+
help="restrict to dirty tree (staged + unstaged + untracked)",
|
|
72
|
+
)
|
|
73
|
+
lint.add_argument(
|
|
74
|
+
"--since",
|
|
75
|
+
metavar="<ref>",
|
|
76
|
+
default=None,
|
|
77
|
+
help="restrict to three-dot diff <ref>...HEAD unioned with dirty tree",
|
|
78
|
+
)
|
|
79
|
+
sub.add_parser("doctor", help="verify the hermetic install is functional")
|
|
80
|
+
return parser.parse_args(argv)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _tackbox_root() -> Path:
|
|
84
|
+
return Path(__file__).resolve().parents[2]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _run_lint(scope: str, no_cache: bool, changed: bool, since: str | None) -> int:
|
|
88
|
+
repo_root = _find_repo_root()
|
|
89
|
+
tackbox_root = _tackbox_root()
|
|
90
|
+
|
|
91
|
+
changed_scope: set[str] | None = None
|
|
92
|
+
if changed or since is not None:
|
|
93
|
+
changed_scope = _compute_changed_scope(repo_root, since)
|
|
94
|
+
|
|
95
|
+
files, warnings = _collect_source_set(repo_root, scope, changed_scope)
|
|
96
|
+
for w in warnings:
|
|
97
|
+
print(f"tackbox: warning: {w.reason}: {w.path}", file=sys.stderr)
|
|
98
|
+
|
|
99
|
+
if not files:
|
|
100
|
+
print(
|
|
101
|
+
f"tackbox: scope {scope!r} matched no files in the source set",
|
|
102
|
+
file=sys.stderr,
|
|
103
|
+
)
|
|
104
|
+
return 2
|
|
105
|
+
|
|
106
|
+
_print_banner(tackbox_root)
|
|
107
|
+
|
|
108
|
+
plan = dispatch(files, active_engines())
|
|
109
|
+
if not plan:
|
|
110
|
+
return 0
|
|
111
|
+
|
|
112
|
+
# Self-lint: tackbox lints itself. Cache is disabled so tackbox never
|
|
113
|
+
# self-caches its own bugs (plan: "чтобы tackbox не самокэшировал").
|
|
114
|
+
if tackbox_root.resolve() == repo_root.resolve():
|
|
115
|
+
no_cache = True
|
|
116
|
+
|
|
117
|
+
if no_cache:
|
|
118
|
+
results = run_engines(plan, repo_root, tackbox_root)
|
|
119
|
+
else:
|
|
120
|
+
cache_root = cache.default_cache_root()
|
|
121
|
+
engines_hash = engines_hash_hermetic() if is_hermetic() else cache.engines_hash_dev(tackbox_root)
|
|
122
|
+
cache.gc_stale_engines(engines_hash, cache_root)
|
|
123
|
+
|
|
124
|
+
filtered_plan, pending = _apply_cache(plan, repo_root, engines_hash, cache_root)
|
|
125
|
+
results = run_engines(filtered_plan, repo_root, tackbox_root)
|
|
126
|
+
_mark_clean_units(results, pending, engines_hash, cache_root)
|
|
127
|
+
cache.gc_soft_cap(engines_hash, cache.SOFT_CAP, cache_root)
|
|
128
|
+
|
|
129
|
+
for r in results:
|
|
130
|
+
sys.stdout.write(f"== {r.engine_id} ==\n")
|
|
131
|
+
if r.stdout:
|
|
132
|
+
sys.stdout.write(r.stdout)
|
|
133
|
+
if not r.stdout.endswith("\n"):
|
|
134
|
+
sys.stdout.write("\n")
|
|
135
|
+
if r.stderr:
|
|
136
|
+
sys.stderr.write(r.stderr)
|
|
137
|
+
if not r.stderr.endswith("\n"):
|
|
138
|
+
sys.stderr.write("\n")
|
|
139
|
+
|
|
140
|
+
return _aggregate_exit(results)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
# -- Cache wiring ---------------------------------------------------------
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _apply_cache(
|
|
147
|
+
plan: list[tuple[EngineSpec, list[str]]],
|
|
148
|
+
repo_root: Path,
|
|
149
|
+
engines_hash: str,
|
|
150
|
+
cache_root: Path,
|
|
151
|
+
) -> tuple[list[tuple[EngineSpec, list[str]]], dict[str, dict]]:
|
|
152
|
+
"""Filter cached units out of each engine's args.
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
- filtered_plan: engines that still have uncached args.
|
|
156
|
+
- pending[engine_id] = {
|
|
157
|
+
"arg_digest": [(arg, digest), ...], # uncached args passed to engine
|
|
158
|
+
"arg_ip": {arg: import_path, ...}, # erclint-only mapping
|
|
159
|
+
}
|
|
160
|
+
Used post-run to translate engine output into per-unit success and
|
|
161
|
+
write markers for the clean units.
|
|
162
|
+
"""
|
|
163
|
+
filtered_plan: list[tuple[EngineSpec, list[str]]] = []
|
|
164
|
+
pending: dict[str, dict] = {}
|
|
165
|
+
for engine, args in plan:
|
|
166
|
+
arg_digest, extras = _digests_for_engine(engine, args, repo_root)
|
|
167
|
+
uncached: list[tuple[str, str]] = []
|
|
168
|
+
for arg, digest in arg_digest:
|
|
169
|
+
if digest is None:
|
|
170
|
+
uncached.append((arg, digest))
|
|
171
|
+
continue
|
|
172
|
+
key = cache.CacheKey(engines_hash, digest, engine.id)
|
|
173
|
+
if not cache.is_cached(key, cache_root):
|
|
174
|
+
uncached.append((arg, digest))
|
|
175
|
+
pending[engine.id] = {"arg_digest": uncached, **extras}
|
|
176
|
+
if uncached:
|
|
177
|
+
filtered_plan.append((engine, [a for a, _ in uncached]))
|
|
178
|
+
return filtered_plan, pending
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _digests_for_engine(
|
|
182
|
+
engine: EngineSpec, args: list[str], repo_root: Path
|
|
183
|
+
) -> tuple[list[tuple[str, str]], dict]:
|
|
184
|
+
if engine.id == "erclint":
|
|
185
|
+
digest_map = cache.erclint_package_digests(repo_root, args)
|
|
186
|
+
ip_map = cache.erclint_import_paths(repo_root, args)
|
|
187
|
+
# digest None = lint always, cache never; dropping the arg instead
|
|
188
|
+
# would silently skip linting the package.
|
|
189
|
+
arg_digest = [(a, digest_map.get(a)) for a in args]
|
|
190
|
+
return arg_digest, {"arg_ip": ip_map}
|
|
191
|
+
arg_digest = [(a, cache.sha256_file(repo_root / a)) for a in args]
|
|
192
|
+
return arg_digest, {}
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _mark_clean_units(
|
|
196
|
+
results: list[EngineResult],
|
|
197
|
+
pending: dict[str, dict],
|
|
198
|
+
engines_hash: str,
|
|
199
|
+
cache_root: Path,
|
|
200
|
+
) -> None:
|
|
201
|
+
for r in results:
|
|
202
|
+
info = pending.get(r.engine_id)
|
|
203
|
+
if not info:
|
|
204
|
+
continue
|
|
205
|
+
clean_args = _clean_args(r, info)
|
|
206
|
+
digest_of = dict(info["arg_digest"])
|
|
207
|
+
for arg in clean_args:
|
|
208
|
+
digest = digest_of.get(arg)
|
|
209
|
+
if digest is None:
|
|
210
|
+
continue
|
|
211
|
+
cache.mark_clean(
|
|
212
|
+
cache.CacheKey(engines_hash, digest, r.engine_id), cache_root
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def _clean_args(r: EngineResult, info: dict) -> list[str]:
|
|
217
|
+
args = [a for a, _ in info["arg_digest"]]
|
|
218
|
+
if r.engine_id == "erclint":
|
|
219
|
+
try:
|
|
220
|
+
findings = parse_erclint_findings(r.stdout)
|
|
221
|
+
except ValueError:
|
|
222
|
+
return []
|
|
223
|
+
dirty_ips = {f.get("pkg") for f in findings}
|
|
224
|
+
ip_map = info.get("arg_ip", {})
|
|
225
|
+
# Unknown import path -> cannot attribute findings -> never clean.
|
|
226
|
+
return [
|
|
227
|
+
a for a in args
|
|
228
|
+
if ip_map.get(a) is not None and ip_map[a] not in dirty_ips
|
|
229
|
+
]
|
|
230
|
+
if r.exit_code == 0:
|
|
231
|
+
return args
|
|
232
|
+
return []
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _aggregate_exit(results: list[EngineResult]) -> int:
|
|
236
|
+
"""Aggregate engine exit codes; promote erclint findings to nonzero.
|
|
237
|
+
|
|
238
|
+
erclint in `-json` mode returns exit 0 even when findings exist -
|
|
239
|
+
handover #2 pinned this contract. tackbox is the layer that translates
|
|
240
|
+
findings into a failing aggregate exit.
|
|
241
|
+
"""
|
|
242
|
+
max_code = 0
|
|
243
|
+
for r in results:
|
|
244
|
+
code = r.exit_code
|
|
245
|
+
if code == 0 and r.engine_id == "erclint" and _erclint_has_findings(r.stdout):
|
|
246
|
+
code = 1
|
|
247
|
+
if code > max_code:
|
|
248
|
+
max_code = code
|
|
249
|
+
return max_code
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def _erclint_has_findings(stdout: str) -> bool:
|
|
253
|
+
try:
|
|
254
|
+
return bool(parse_erclint_findings(stdout))
|
|
255
|
+
except ValueError:
|
|
256
|
+
# Analyzer-load errors surface as a failing aggregate.
|
|
257
|
+
return True
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def _collect_source_set(
|
|
261
|
+
repo_root: Path, scope: str, changed_scope: set[str] | None = None
|
|
262
|
+
):
|
|
263
|
+
stage_raw = subprocess.run(
|
|
264
|
+
["git", "ls-files", "-s", "-z"],
|
|
265
|
+
cwd=repo_root,
|
|
266
|
+
capture_output=True,
|
|
267
|
+
check=True,
|
|
268
|
+
).stdout
|
|
269
|
+
untracked_raw = subprocess.run(
|
|
270
|
+
["git", "ls-files", "--others", "--exclude-standard", "-z"],
|
|
271
|
+
cwd=repo_root,
|
|
272
|
+
capture_output=True,
|
|
273
|
+
check=True,
|
|
274
|
+
).stdout
|
|
275
|
+
return filter_source_set(
|
|
276
|
+
parse_ls_files_stage(stage_raw),
|
|
277
|
+
parse_ls_files_untracked(untracked_raw),
|
|
278
|
+
scope,
|
|
279
|
+
exists=lambda p: (repo_root / p).exists(),
|
|
280
|
+
is_symlink=lambda p: (repo_root / p).is_symlink(),
|
|
281
|
+
changed_scope=changed_scope,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def _compute_changed_scope(repo_root: Path, since: str | None) -> set[str]:
|
|
286
|
+
"""Union of dirty tree with (optional) three-dot diff against <since>.
|
|
287
|
+
|
|
288
|
+
Dirty tree = files that differ from HEAD in the index or worktree,
|
|
289
|
+
plus untracked. Three-dot diff = files changed on this branch since
|
|
290
|
+
the merge-base with <since>; matches the PR-style question "what did
|
|
291
|
+
I change on my branch." A two-dot diff would leak reverse-changes
|
|
292
|
+
when <since> progresses after fork.
|
|
293
|
+
"""
|
|
294
|
+
scope: set[str] = set()
|
|
295
|
+
completed = subprocess.run(
|
|
296
|
+
["git", "diff", "--name-only", "-z", "HEAD"],
|
|
297
|
+
cwd=repo_root,
|
|
298
|
+
capture_output=True,
|
|
299
|
+
)
|
|
300
|
+
if completed.returncode != 0:
|
|
301
|
+
# Fresh repo without any commits: HEAD does not resolve. Fail with a
|
|
302
|
+
# clean tackbox message instead of a Python traceback on onboarding.
|
|
303
|
+
err = completed.stderr.decode("utf-8", errors="replace").strip()
|
|
304
|
+
raise ChangedScopeError(
|
|
305
|
+
f"--changed / --since requires at least one commit ({err})"
|
|
306
|
+
)
|
|
307
|
+
scope.update(parse_git_diff_names(completed.stdout))
|
|
308
|
+
untracked = subprocess.run(
|
|
309
|
+
["git", "ls-files", "--others", "--exclude-standard", "-z"],
|
|
310
|
+
cwd=repo_root,
|
|
311
|
+
capture_output=True,
|
|
312
|
+
check=True,
|
|
313
|
+
).stdout
|
|
314
|
+
scope.update(parse_ls_files_untracked(untracked))
|
|
315
|
+
if since is not None:
|
|
316
|
+
completed = subprocess.run(
|
|
317
|
+
["git", "diff", "--name-only", "-z", f"{since}...HEAD"],
|
|
318
|
+
cwd=repo_root,
|
|
319
|
+
capture_output=True,
|
|
320
|
+
)
|
|
321
|
+
if completed.returncode != 0:
|
|
322
|
+
err = completed.stderr.decode("utf-8", errors="replace").strip()
|
|
323
|
+
raise ChangedScopeError(f"--since={since}: {err or 'git diff failed'}")
|
|
324
|
+
scope.update(parse_git_diff_names(completed.stdout))
|
|
325
|
+
return scope
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def _find_repo_root() -> Path:
|
|
329
|
+
result = subprocess.run(
|
|
330
|
+
["git", "rev-parse", "--show-toplevel"],
|
|
331
|
+
capture_output=True,
|
|
332
|
+
check=True,
|
|
333
|
+
)
|
|
334
|
+
return Path(result.stdout.decode().strip())
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def _print_banner(tackbox_root: Path) -> None:
|
|
338
|
+
if is_hermetic():
|
|
339
|
+
versions = resolve_hermetic_versions()
|
|
340
|
+
engines_id = f"sha256:{engines_hash_hermetic()}"
|
|
341
|
+
else:
|
|
342
|
+
versions = resolve_dev_versions(tackbox_root)
|
|
343
|
+
engines_id = "dev"
|
|
344
|
+
parts = " ".join(f"{k}={versions[k]}" for k in _BANNER_ORDER)
|
|
345
|
+
print(f"tackbox {__version__} engines={engines_id} {parts}", file=sys.stderr)
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
if __name__ == "__main__":
|
|
349
|
+
sys.exit(main())
|