tackbox 0.1.14__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tackbox/__init__.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.1.14"
Binary file
Binary file
tackbox/cache.py ADDED
@@ -0,0 +1,351 @@
1
+ """(unit, engine) cache: layout, digests, marker ops, GC.
2
+
3
+ Layout: `<TACKBOX_CACHE_HOME>/v1/<engines-hash>/<unit-digest>.<engine-id>`.
4
+ Marker is an empty file. `TACKBOX_CACHE_HOME` defaults to `~/.cache/tackbox`;
5
+ tests point it at a tmp dir.
6
+
7
+ Semantics per plan:
8
+ - Marker written only on success. Failures are not cached.
9
+ - Cache hit re-touches the marker: mtime is the LRU signal.
10
+ - Corrupt / unreadable marker -> treated as miss (rerun), never fatal.
11
+ - `mark_clean` is best-effort; cache is an optimisation, never a hard error.
12
+
13
+ Unit granularity:
14
+ - eslint / mdlint / opengrep -> unit = file, digest = sha256(content).
15
+ - erclint -> unit = Go package, digest = sha256(import path + own .go files +
16
+ transitive in-module deps' .go files + go.mod + go.sum). A signature change
17
+ in package B invalidates every in-module package that depends on B.
18
+
19
+ engines-hash:
20
+ - Dev mode digests the engine payload sources (go/, js/, bin/, the eslint
21
+ preset, npm manifest and lockfile), so editing any rule invalidates prior
22
+ markers and stale clean-results can never mask new findings. The
23
+ orchestrator under py/ is deliberately outside the payload. Hermetic
24
+ wheels (step 6) replace this with the bundled-payload digest.
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import hashlib
30
+ import json
31
+ import os
32
+ import shutil
33
+ import subprocess
34
+ from dataclasses import dataclass
35
+ from pathlib import Path
36
+
37
+
38
+ CACHE_ROOT_ENV = "TACKBOX_CACHE_HOME"
39
+ CACHE_VERSION = "v1"
40
+ SOFT_CAP = 20000
41
+
42
+
43
+ def default_cache_root() -> Path:
44
+ """Root under which the `v1/<engines-hash>/...` tree lives."""
45
+ override = os.environ.get(CACHE_ROOT_ENV)
46
+ if override:
47
+ return Path(override) / CACHE_VERSION
48
+ return Path.home() / ".cache" / "tackbox" / CACHE_VERSION
49
+
50
+
51
+ @dataclass(frozen=True)
52
+ class CacheKey:
53
+ engines_hash: str
54
+ unit_digest: str
55
+ engine_id: str
56
+
57
+ def marker(self, root: Path) -> Path:
58
+ return root / self.engines_hash / f"{self.unit_digest}.{self.engine_id}"
59
+
60
+
61
+ # Engine payload in dev mode: everything that shapes findings, nothing else.
62
+ _DEV_PAYLOAD = (
63
+ "go",
64
+ "js",
65
+ "bin",
66
+ "eslint.config.preset.js",
67
+ "package.json",
68
+ "package-lock.json",
69
+ )
70
+
71
+
72
+ def engines_hash_dev(tackbox_root: Path) -> str:
73
+ h = hashlib.sha256()
74
+ h.update(b"dev-payload-v1\n")
75
+ for top in _DEV_PAYLOAD:
76
+ p = tackbox_root / top
77
+ if p.is_file():
78
+ _hash_payload_file(h, top, p)
79
+ elif p.is_dir():
80
+ for f in sorted(p.rglob("*")):
81
+ if f.is_file() and "__pycache__" not in f.parts:
82
+ _hash_payload_file(h, f.relative_to(tackbox_root).as_posix(), f)
83
+ return h.hexdigest()
84
+
85
+
86
+ def _hash_payload_file(h, rel: str, path: Path) -> None:
87
+ h.update(rel.encode())
88
+ h.update(b"\t")
89
+ h.update(sha256_file(path).encode())
90
+ h.update(b"\n")
91
+
92
+
93
+ def is_cached(key: CacheKey, root: Path) -> bool:
94
+ """Return True iff a valid marker file exists; re-touch for LRU on hit."""
95
+ p = key.marker(root)
96
+ try:
97
+ if not p.is_file():
98
+ return False
99
+ p.touch()
100
+ return True
101
+ except OSError:
102
+ return False
103
+
104
+
105
+ def mark_clean(key: CacheKey, root: Path) -> None:
106
+ """Write empty marker; swallow any OSError so cache never blocks a run."""
107
+ p = key.marker(root)
108
+ try:
109
+ p.parent.mkdir(parents=True, exist_ok=True)
110
+ p.touch()
111
+ except OSError:
112
+ pass
113
+
114
+
115
+ def sha256_file(path: Path) -> str:
116
+ h = hashlib.sha256()
117
+ with path.open("rb") as f:
118
+ for chunk in iter(lambda: f.read(65536), b""):
119
+ h.update(chunk)
120
+ return h.hexdigest()
121
+
122
+
123
+ def sha256_tree(root: Path) -> str:
124
+ """Deterministic digest of a directory tree: sorted rel paths + content.
125
+
126
+ Shared by the wheel builder (stamping engines.json) and doctor
127
+ (verifying it) - one implementation so the two can never drift.
128
+ """
129
+ h = hashlib.sha256()
130
+ for f in sorted(root.rglob("*")):
131
+ if not f.is_file():
132
+ continue
133
+ h.update(f.relative_to(root).as_posix().encode())
134
+ h.update(b"\0")
135
+ h.update(sha256_file(f).encode())
136
+ h.update(b"\0")
137
+ return h.hexdigest()
138
+
139
+
140
+ def gc_stale_engines(current: str, root: Path) -> None:
141
+ """Drop every `<engines-hash>/` sibling other than `current`."""
142
+ if not root.is_dir():
143
+ return
144
+ for entry in root.iterdir():
145
+ if entry.is_dir() and entry.name != current:
146
+ shutil.rmtree(entry, ignore_errors=True)
147
+
148
+
149
+ def gc_soft_cap(engines_hash: str, cap: int, root: Path) -> None:
150
+ """Trim markers in the current dir when the count exceeds `cap`.
151
+
152
+ Sort by mtime ascending; drop from the front until at or under cap. Files
153
+ that vanish under us (concurrent run) are ignored - GC never blocks.
154
+ """
155
+ d = root / engines_hash
156
+ if not d.is_dir():
157
+ return
158
+ markers = [p for p in d.iterdir() if p.is_file()]
159
+ if len(markers) <= cap:
160
+ return
161
+
162
+ def _mtime(p: Path) -> float:
163
+ # A marker can vanish between iterdir and stat (concurrent run);
164
+ # treat it as oldest so unlink handles it, never raise.
165
+ try:
166
+ return p.stat().st_mtime
167
+ except OSError:
168
+ return 0.0
169
+
170
+ markers.sort(key=_mtime)
171
+ for m in markers[: len(markers) - cap]:
172
+ try:
173
+ m.unlink()
174
+ except OSError:
175
+ pass
176
+
177
+
178
+ # -- erclint package digest -----------------------------------------------
179
+
180
+
181
+ def erclint_package_digests(
182
+ repo_root: Path, package_dirs: list[str]
183
+ ) -> dict[str, str]:
184
+ """Compute {package_dir: unit_digest} for erclint units.
185
+
186
+ Runs `go list -deps -json` once for all requested packages, filters to
187
+ same-module deps, then hashes each package's own .go files together with
188
+ the .go files of its transitive in-module deps plus go.mod / go.sum. A
189
+ change to any file that erclint would see for a package flips the digest.
190
+
191
+ Missing / not-a-package entries are dropped from the returned map; the
192
+ caller decides what to do (usually: skip caching for that entry).
193
+ """
194
+ if not package_dirs:
195
+ return {}
196
+ args = [f"./{p}" for p in package_dirs]
197
+ completed = subprocess.run(
198
+ ["go", "list", "-deps", "-json", *args],
199
+ cwd=repo_root,
200
+ capture_output=True,
201
+ text=True,
202
+ check=True,
203
+ )
204
+ pkgs = list(_iter_json_objects(completed.stdout))
205
+
206
+ our_module = _module_path_from_pkgs(pkgs)
207
+ if our_module is None:
208
+ return {}
209
+
210
+ in_module: dict[str, dict] = {}
211
+ for p in pkgs:
212
+ if p.get("Standard"):
213
+ continue
214
+ mod = p.get("Module") or {}
215
+ if mod.get("Path") != our_module:
216
+ continue
217
+ import_path = p["ImportPath"]
218
+ dir_ = Path(p["Dir"])
219
+ go_files = list(p.get("GoFiles") or [])
220
+ # Deps is the transitive closure per `go list -json`; filter later.
221
+ deps = set(p.get("Deps") or [])
222
+ in_module[import_path] = {
223
+ "dir": dir_,
224
+ "files": [dir_ / f for f in go_files],
225
+ "deps": deps,
226
+ }
227
+ for info in in_module.values():
228
+ info["deps"] = {d for d in info["deps"] if d in in_module}
229
+
230
+ file_digest: dict[Path, str] = {}
231
+ for info in in_module.values():
232
+ for f in info["files"]:
233
+ if f not in file_digest and f.is_file():
234
+ file_digest[f] = sha256_file(f)
235
+
236
+ go_mod_digest = _optional_file_digest(repo_root / "go.mod")
237
+ go_sum_digest = _optional_file_digest(repo_root / "go.sum")
238
+
239
+ dir_to_import: dict[str, str] = {}
240
+ repo_resolved = repo_root.resolve()
241
+ for import_path, info in in_module.items():
242
+ try:
243
+ rel = info["dir"].resolve().relative_to(repo_resolved)
244
+ except ValueError:
245
+ continue
246
+ key = str(rel) if str(rel) != "." else "."
247
+ dir_to_import[key] = import_path
248
+
249
+ result: dict[str, str] = {}
250
+ for pkg_dir in package_dirs:
251
+ import_path = dir_to_import.get(pkg_dir)
252
+ if import_path is None:
253
+ continue
254
+ result[pkg_dir] = _package_digest(
255
+ import_path, in_module, file_digest, go_mod_digest, go_sum_digest
256
+ )
257
+ return result
258
+
259
+
260
+ def erclint_import_paths(
261
+ repo_root: Path, package_dirs: list[str]
262
+ ) -> dict[str, str]:
263
+ """Return {package_dir: import_path}. Used to interpret erclint findings."""
264
+ if not package_dirs:
265
+ return {}
266
+ args = [f"./{p}" for p in package_dirs]
267
+ completed = subprocess.run(
268
+ ["go", "list", "-json", *args],
269
+ cwd=repo_root,
270
+ capture_output=True,
271
+ text=True,
272
+ check=True,
273
+ )
274
+ pkgs = list(_iter_json_objects(completed.stdout))
275
+ repo_resolved = repo_root.resolve()
276
+ result: dict[str, str] = {}
277
+ for p in pkgs:
278
+ try:
279
+ rel = Path(p["Dir"]).resolve().relative_to(repo_resolved)
280
+ except ValueError:
281
+ continue
282
+ key = str(rel) if str(rel) != "." else "."
283
+ if key in package_dirs:
284
+ result[key] = p["ImportPath"]
285
+ return result
286
+
287
+
288
+ def _package_digest(
289
+ import_path: str,
290
+ in_module: dict[str, dict],
291
+ file_digest: dict[Path, str],
292
+ go_mod_digest: str,
293
+ go_sum_digest: str,
294
+ ) -> str:
295
+ h = hashlib.sha256()
296
+ h.update(import_path.encode())
297
+ h.update(b"\n---self---\n")
298
+ _hash_files(h, in_module[import_path]["files"], file_digest)
299
+ h.update(b"---deps---\n")
300
+ for dep in sorted(in_module[import_path]["deps"]):
301
+ h.update(dep.encode())
302
+ h.update(b"\n")
303
+ _hash_files(h, in_module[dep]["files"], file_digest)
304
+ h.update(b"---go.mod---\n")
305
+ h.update(go_mod_digest.encode())
306
+ h.update(b"\n---go.sum---\n")
307
+ h.update(go_sum_digest.encode())
308
+ return h.hexdigest()
309
+
310
+
311
+ def _hash_files(h, files, file_digest: dict[Path, str]) -> None:
312
+ for f in sorted(files):
313
+ digest = file_digest.get(f)
314
+ if digest is None:
315
+ continue
316
+ h.update(f.name.encode())
317
+ h.update(b"\t")
318
+ h.update(digest.encode())
319
+ h.update(b"\n")
320
+
321
+
322
+ def _optional_file_digest(path: Path) -> str:
323
+ if not path.is_file():
324
+ return ""
325
+ return sha256_file(path)
326
+
327
+
328
+ def _module_path_from_pkgs(pkgs: list[dict]) -> str | None:
329
+ for p in pkgs:
330
+ if p.get("Standard"):
331
+ continue
332
+ mod = p.get("Module") or {}
333
+ path = mod.get("Path")
334
+ if path:
335
+ return path
336
+ return None
337
+
338
+
339
+ def _iter_json_objects(text: str):
340
+ """Iterate over concatenated top-level JSON objects in `text`."""
341
+ decoder = json.JSONDecoder()
342
+ idx = 0
343
+ n = len(text)
344
+ while idx < n:
345
+ while idx < n and text[idx].isspace():
346
+ idx += 1
347
+ if idx >= n:
348
+ break
349
+ obj, end = decoder.raw_decode(text, idx)
350
+ yield obj
351
+ idx = end
tackbox/cli.py ADDED
@@ -0,0 +1,349 @@
1
+ """tackbox lint / doctor CLI entry point."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import subprocess
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ from . import __version__, cache, doctor
11
+ from .engines import (
12
+ EngineResult,
13
+ EngineSpec,
14
+ active_engines,
15
+ dispatch,
16
+ engines_hash_hermetic,
17
+ is_hermetic,
18
+ parse_erclint_findings,
19
+ resolve_dev_versions,
20
+ resolve_hermetic_versions,
21
+ run_engines,
22
+ )
23
+ from .source_set import (
24
+ PathspecMagicError,
25
+ filter_source_set,
26
+ parse_git_diff_names,
27
+ parse_ls_files_stage,
28
+ parse_ls_files_untracked,
29
+ )
30
+
31
+
32
+ class ChangedScopeError(ValueError):
33
+ """Raised when the git commands backing --changed / --since fail."""
34
+
35
+ _BANNER_ORDER = ("erclint", "opengrep", "node", "eslint", "markdownlint")
36
+
37
+
38
+ def main(argv: list[str] | None = None) -> int:
39
+ args = _parse_argv(sys.argv[1:] if argv is None else argv)
40
+ if args.command == "lint":
41
+ try:
42
+ return _run_lint(
43
+ args.path,
44
+ no_cache=args.no_cache,
45
+ changed=args.changed,
46
+ since=args.since,
47
+ )
48
+ except (PathspecMagicError, ChangedScopeError) as e:
49
+ print(f"tackbox: {e}", file=sys.stderr)
50
+ return 2
51
+ if args.command == "doctor":
52
+ _print_banner(_tackbox_root())
53
+ return doctor.run(sys.stdout)
54
+ print(f"tackbox: unknown command {args.command!r}", file=sys.stderr)
55
+ return 2
56
+
57
+
58
+ def _parse_argv(argv: list[str]) -> argparse.Namespace:
59
+ parser = argparse.ArgumentParser(prog="tackbox")
60
+ sub = parser.add_subparsers(dest="command", required=True)
61
+ lint = sub.add_parser("lint", help="lint the source set")
62
+ lint.add_argument("path", nargs="?", default=".", help="scope path (default: .)")
63
+ lint.add_argument(
64
+ "--no-cache",
65
+ action="store_true",
66
+ help="ignore and do not write the (unit, engine) cache",
67
+ )
68
+ lint.add_argument(
69
+ "--changed",
70
+ action="store_true",
71
+ help="restrict to dirty tree (staged + unstaged + untracked)",
72
+ )
73
+ lint.add_argument(
74
+ "--since",
75
+ metavar="<ref>",
76
+ default=None,
77
+ help="restrict to three-dot diff <ref>...HEAD unioned with dirty tree",
78
+ )
79
+ sub.add_parser("doctor", help="verify the hermetic install is functional")
80
+ return parser.parse_args(argv)
81
+
82
+
83
+ def _tackbox_root() -> Path:
84
+ return Path(__file__).resolve().parents[2]
85
+
86
+
87
+ def _run_lint(scope: str, no_cache: bool, changed: bool, since: str | None) -> int:
88
+ repo_root = _find_repo_root()
89
+ tackbox_root = _tackbox_root()
90
+
91
+ changed_scope: set[str] | None = None
92
+ if changed or since is not None:
93
+ changed_scope = _compute_changed_scope(repo_root, since)
94
+
95
+ files, warnings = _collect_source_set(repo_root, scope, changed_scope)
96
+ for w in warnings:
97
+ print(f"tackbox: warning: {w.reason}: {w.path}", file=sys.stderr)
98
+
99
+ if not files:
100
+ print(
101
+ f"tackbox: scope {scope!r} matched no files in the source set",
102
+ file=sys.stderr,
103
+ )
104
+ return 2
105
+
106
+ _print_banner(tackbox_root)
107
+
108
+ plan = dispatch(files, active_engines())
109
+ if not plan:
110
+ return 0
111
+
112
+ # Self-lint: tackbox lints itself. Cache is disabled so tackbox never
113
+ # self-caches its own bugs (plan: "чтобы tackbox не самокэшировал").
114
+ if tackbox_root.resolve() == repo_root.resolve():
115
+ no_cache = True
116
+
117
+ if no_cache:
118
+ results = run_engines(plan, repo_root, tackbox_root)
119
+ else:
120
+ cache_root = cache.default_cache_root()
121
+ engines_hash = engines_hash_hermetic() if is_hermetic() else cache.engines_hash_dev(tackbox_root)
122
+ cache.gc_stale_engines(engines_hash, cache_root)
123
+
124
+ filtered_plan, pending = _apply_cache(plan, repo_root, engines_hash, cache_root)
125
+ results = run_engines(filtered_plan, repo_root, tackbox_root)
126
+ _mark_clean_units(results, pending, engines_hash, cache_root)
127
+ cache.gc_soft_cap(engines_hash, cache.SOFT_CAP, cache_root)
128
+
129
+ for r in results:
130
+ sys.stdout.write(f"== {r.engine_id} ==\n")
131
+ if r.stdout:
132
+ sys.stdout.write(r.stdout)
133
+ if not r.stdout.endswith("\n"):
134
+ sys.stdout.write("\n")
135
+ if r.stderr:
136
+ sys.stderr.write(r.stderr)
137
+ if not r.stderr.endswith("\n"):
138
+ sys.stderr.write("\n")
139
+
140
+ return _aggregate_exit(results)
141
+
142
+
143
+ # -- Cache wiring ---------------------------------------------------------
144
+
145
+
146
+ def _apply_cache(
147
+ plan: list[tuple[EngineSpec, list[str]]],
148
+ repo_root: Path,
149
+ engines_hash: str,
150
+ cache_root: Path,
151
+ ) -> tuple[list[tuple[EngineSpec, list[str]]], dict[str, dict]]:
152
+ """Filter cached units out of each engine's args.
153
+
154
+ Returns:
155
+ - filtered_plan: engines that still have uncached args.
156
+ - pending[engine_id] = {
157
+ "arg_digest": [(arg, digest), ...], # uncached args passed to engine
158
+ "arg_ip": {arg: import_path, ...}, # erclint-only mapping
159
+ }
160
+ Used post-run to translate engine output into per-unit success and
161
+ write markers for the clean units.
162
+ """
163
+ filtered_plan: list[tuple[EngineSpec, list[str]]] = []
164
+ pending: dict[str, dict] = {}
165
+ for engine, args in plan:
166
+ arg_digest, extras = _digests_for_engine(engine, args, repo_root)
167
+ uncached: list[tuple[str, str]] = []
168
+ for arg, digest in arg_digest:
169
+ if digest is None:
170
+ uncached.append((arg, digest))
171
+ continue
172
+ key = cache.CacheKey(engines_hash, digest, engine.id)
173
+ if not cache.is_cached(key, cache_root):
174
+ uncached.append((arg, digest))
175
+ pending[engine.id] = {"arg_digest": uncached, **extras}
176
+ if uncached:
177
+ filtered_plan.append((engine, [a for a, _ in uncached]))
178
+ return filtered_plan, pending
179
+
180
+
181
+ def _digests_for_engine(
182
+ engine: EngineSpec, args: list[str], repo_root: Path
183
+ ) -> tuple[list[tuple[str, str]], dict]:
184
+ if engine.id == "erclint":
185
+ digest_map = cache.erclint_package_digests(repo_root, args)
186
+ ip_map = cache.erclint_import_paths(repo_root, args)
187
+ # digest None = lint always, cache never; dropping the arg instead
188
+ # would silently skip linting the package.
189
+ arg_digest = [(a, digest_map.get(a)) for a in args]
190
+ return arg_digest, {"arg_ip": ip_map}
191
+ arg_digest = [(a, cache.sha256_file(repo_root / a)) for a in args]
192
+ return arg_digest, {}
193
+
194
+
195
+ def _mark_clean_units(
196
+ results: list[EngineResult],
197
+ pending: dict[str, dict],
198
+ engines_hash: str,
199
+ cache_root: Path,
200
+ ) -> None:
201
+ for r in results:
202
+ info = pending.get(r.engine_id)
203
+ if not info:
204
+ continue
205
+ clean_args = _clean_args(r, info)
206
+ digest_of = dict(info["arg_digest"])
207
+ for arg in clean_args:
208
+ digest = digest_of.get(arg)
209
+ if digest is None:
210
+ continue
211
+ cache.mark_clean(
212
+ cache.CacheKey(engines_hash, digest, r.engine_id), cache_root
213
+ )
214
+
215
+
216
+ def _clean_args(r: EngineResult, info: dict) -> list[str]:
217
+ args = [a for a, _ in info["arg_digest"]]
218
+ if r.engine_id == "erclint":
219
+ try:
220
+ findings = parse_erclint_findings(r.stdout)
221
+ except ValueError:
222
+ return []
223
+ dirty_ips = {f.get("pkg") for f in findings}
224
+ ip_map = info.get("arg_ip", {})
225
+ # Unknown import path -> cannot attribute findings -> never clean.
226
+ return [
227
+ a for a in args
228
+ if ip_map.get(a) is not None and ip_map[a] not in dirty_ips
229
+ ]
230
+ if r.exit_code == 0:
231
+ return args
232
+ return []
233
+
234
+
235
+ def _aggregate_exit(results: list[EngineResult]) -> int:
236
+ """Aggregate engine exit codes; promote erclint findings to nonzero.
237
+
238
+ erclint in `-json` mode returns exit 0 even when findings exist -
239
+ handover #2 pinned this contract. tackbox is the layer that translates
240
+ findings into a failing aggregate exit.
241
+ """
242
+ max_code = 0
243
+ for r in results:
244
+ code = r.exit_code
245
+ if code == 0 and r.engine_id == "erclint" and _erclint_has_findings(r.stdout):
246
+ code = 1
247
+ if code > max_code:
248
+ max_code = code
249
+ return max_code
250
+
251
+
252
+ def _erclint_has_findings(stdout: str) -> bool:
253
+ try:
254
+ return bool(parse_erclint_findings(stdout))
255
+ except ValueError:
256
+ # Analyzer-load errors surface as a failing aggregate.
257
+ return True
258
+
259
+
260
+ def _collect_source_set(
261
+ repo_root: Path, scope: str, changed_scope: set[str] | None = None
262
+ ):
263
+ stage_raw = subprocess.run(
264
+ ["git", "ls-files", "-s", "-z"],
265
+ cwd=repo_root,
266
+ capture_output=True,
267
+ check=True,
268
+ ).stdout
269
+ untracked_raw = subprocess.run(
270
+ ["git", "ls-files", "--others", "--exclude-standard", "-z"],
271
+ cwd=repo_root,
272
+ capture_output=True,
273
+ check=True,
274
+ ).stdout
275
+ return filter_source_set(
276
+ parse_ls_files_stage(stage_raw),
277
+ parse_ls_files_untracked(untracked_raw),
278
+ scope,
279
+ exists=lambda p: (repo_root / p).exists(),
280
+ is_symlink=lambda p: (repo_root / p).is_symlink(),
281
+ changed_scope=changed_scope,
282
+ )
283
+
284
+
285
+ def _compute_changed_scope(repo_root: Path, since: str | None) -> set[str]:
286
+ """Union of dirty tree with (optional) three-dot diff against <since>.
287
+
288
+ Dirty tree = files that differ from HEAD in the index or worktree,
289
+ plus untracked. Three-dot diff = files changed on this branch since
290
+ the merge-base with <since>; matches the PR-style question "what did
291
+ I change on my branch." A two-dot diff would leak reverse-changes
292
+ when <since> progresses after fork.
293
+ """
294
+ scope: set[str] = set()
295
+ completed = subprocess.run(
296
+ ["git", "diff", "--name-only", "-z", "HEAD"],
297
+ cwd=repo_root,
298
+ capture_output=True,
299
+ )
300
+ if completed.returncode != 0:
301
+ # Fresh repo without any commits: HEAD does not resolve. Fail with a
302
+ # clean tackbox message instead of a Python traceback on onboarding.
303
+ err = completed.stderr.decode("utf-8", errors="replace").strip()
304
+ raise ChangedScopeError(
305
+ f"--changed / --since requires at least one commit ({err})"
306
+ )
307
+ scope.update(parse_git_diff_names(completed.stdout))
308
+ untracked = subprocess.run(
309
+ ["git", "ls-files", "--others", "--exclude-standard", "-z"],
310
+ cwd=repo_root,
311
+ capture_output=True,
312
+ check=True,
313
+ ).stdout
314
+ scope.update(parse_ls_files_untracked(untracked))
315
+ if since is not None:
316
+ completed = subprocess.run(
317
+ ["git", "diff", "--name-only", "-z", f"{since}...HEAD"],
318
+ cwd=repo_root,
319
+ capture_output=True,
320
+ )
321
+ if completed.returncode != 0:
322
+ err = completed.stderr.decode("utf-8", errors="replace").strip()
323
+ raise ChangedScopeError(f"--since={since}: {err or 'git diff failed'}")
324
+ scope.update(parse_git_diff_names(completed.stdout))
325
+ return scope
326
+
327
+
328
+ def _find_repo_root() -> Path:
329
+ result = subprocess.run(
330
+ ["git", "rev-parse", "--show-toplevel"],
331
+ capture_output=True,
332
+ check=True,
333
+ )
334
+ return Path(result.stdout.decode().strip())
335
+
336
+
337
+ def _print_banner(tackbox_root: Path) -> None:
338
+ if is_hermetic():
339
+ versions = resolve_hermetic_versions()
340
+ engines_id = f"sha256:{engines_hash_hermetic()}"
341
+ else:
342
+ versions = resolve_dev_versions(tackbox_root)
343
+ engines_id = "dev"
344
+ parts = " ".join(f"{k}={versions[k]}" for k in _BANNER_ORDER)
345
+ print(f"tackbox {__version__} engines={engines_id} {parts}", file=sys.stderr)
346
+
347
+
348
+ if __name__ == "__main__":
349
+ sys.exit(main())