stemmata 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
stemmata/__init__.py ADDED
@@ -0,0 +1,6 @@
1
+ from importlib.metadata import version as _pkg_version, PackageNotFoundError
2
+
3
+ try:
4
+ __version__ = _pkg_version("stemmata")
5
+ except PackageNotFoundError:
6
+ __version__ = "0.0.1"
stemmata/__main__.py ADDED
@@ -0,0 +1,3 @@
1
+ from stemmata.cli import main
2
+
3
+ raise SystemExit(main())
stemmata/bundle.py ADDED
@@ -0,0 +1,200 @@
1
+ from __future__ import annotations
2
+
3
+ import gzip
4
+ import hashlib
5
+ import io
6
+ import os
7
+ import tarfile
8
+ from dataclasses import dataclass
9
+ from pathlib import Path
10
+
11
+ from stemmata.errors import SchemaError
12
+
13
+
14
+ # Deterministic build constants. All members carry the same mtime so two
15
+ # builds of identical inputs produce byte-identical tarballs.
16
+ _DETERMINISTIC_MTIME = 0
17
+ _FILE_MODE = 0o644
18
+ _DIR_MODE = 0o755
19
+ _BOM_BYTES = b"\xef\xbb\xbf"
20
+
21
+
22
+ @dataclass
23
+ class BundleMember:
24
+ arcname: str
25
+ data: bytes
26
+ is_dir: bool = False
27
+
28
+
29
+ def _normalise_yaml_bytes(raw: bytes, *, file: str) -> bytes:
30
+ """Strip BOM and normalise CRLF -> LF for YAML payloads on publish."""
31
+ if raw.startswith(_BOM_BYTES):
32
+ raw = raw[len(_BOM_BYTES):]
33
+ if b"\r\n" in raw or b"\r" in raw:
34
+ raw = raw.replace(b"\r\n", b"\n").replace(b"\r", b"\n")
35
+ return raw
36
+
37
+
38
+ def _is_safe_arcname(arcname: str) -> bool:
39
+ if not arcname:
40
+ return False
41
+ if arcname.startswith("/") or "\\" in arcname:
42
+ return False
43
+ parts = arcname.split("/")
44
+ for p in parts:
45
+ if p in ("", ".", ".."):
46
+ return False
47
+ return True
48
+
49
+
50
+ def collect_members(
51
+ package_root: Path,
52
+ extra_files: list[str],
53
+ yaml_paths: list[str],
54
+ ) -> list[BundleMember]:
55
+ """Collect tarball members from a publish source directory.
56
+
57
+ ``yaml_paths`` are POSIX-relative paths to YAML payload files (subject to
58
+ BOM/CRLF normalisation). ``extra_files`` are additional package-relative
59
+ files to ship verbatim (e.g. ``package.json``, ``README.md``, ``LICENSE``).
60
+ """
61
+ members: list[BundleMember] = []
62
+ seen: set[str] = set()
63
+
64
+ def _add_file(rel: str, data: bytes) -> None:
65
+ if rel in seen:
66
+ return
67
+ seen.add(rel)
68
+ if not _is_safe_arcname(rel):
69
+ raise SchemaError(
70
+ f"unsafe path in bundle: {rel!r}",
71
+ file=str(package_root / rel),
72
+ field_name="bundle",
73
+ reason="unsafe_path",
74
+ )
75
+ members.append(BundleMember(arcname=rel, data=data))
76
+
77
+ for rel in extra_files:
78
+ if not _is_safe_arcname(rel):
79
+ raise SchemaError(
80
+ f"unsafe path in bundle: {rel!r}",
81
+ file=str(package_root / rel),
82
+ field_name="bundle",
83
+ reason="unsafe_path",
84
+ )
85
+ full = package_root / rel
86
+ if not full.is_file():
87
+ continue
88
+ if full.is_symlink():
89
+ raise SchemaError(
90
+ f"refusing to bundle symlink: {rel!r}",
91
+ file=str(full),
92
+ field_name="bundle",
93
+ reason="symlink_forbidden",
94
+ )
95
+ _add_file(rel, full.read_bytes())
96
+
97
+ for rel in yaml_paths:
98
+ full = package_root / rel
99
+ if not full.is_file():
100
+ raise SchemaError(
101
+ f"prompt payload file declared by manifest does not exist: {rel}",
102
+ file=str(full),
103
+ field_name="path",
104
+ reason="missing_prompt_file",
105
+ )
106
+ if full.is_symlink():
107
+ raise SchemaError(
108
+ f"refusing to bundle symlink: {rel!r}",
109
+ file=str(full),
110
+ field_name="bundle",
111
+ reason="symlink_forbidden",
112
+ )
113
+ raw = full.read_bytes()
114
+ normalised = _normalise_yaml_bytes(raw, file=str(full))
115
+ _add_file(rel, normalised)
116
+
117
+ members.sort(key=lambda m: m.arcname)
118
+ return members
119
+
120
+
121
+ def build_tarball(members: list[BundleMember]) -> bytes:
122
+ """Build a deterministic gzipped tarball with a single ``package/`` root.
123
+
124
+ All members share mtime 0, uid/gid 0, owner ""/"" and mode 0644 (files) or
125
+ 0755 (dirs). Entries are emitted in sorted arcname order. Gzip is wrapped
126
+ around the tar stream with a fixed header (no embedded filename / mtime).
127
+ """
128
+ tar_buf = io.BytesIO()
129
+ with tarfile.open(fileobj=tar_buf, mode="w", format=tarfile.USTAR_FORMAT) as tf:
130
+ emitted_dirs: set[str] = set()
131
+
132
+ def _ensure_dir(dirpath: str) -> None:
133
+ if not dirpath or dirpath in emitted_dirs:
134
+ return
135
+ parent = "/".join(dirpath.split("/")[:-1])
136
+ if parent:
137
+ _ensure_dir(parent)
138
+ info = tarfile.TarInfo(name=f"package/{dirpath}/")
139
+ info.type = tarfile.DIRTYPE
140
+ info.mode = _DIR_MODE
141
+ info.mtime = _DETERMINISTIC_MTIME
142
+ info.uid = 0
143
+ info.gid = 0
144
+ info.uname = ""
145
+ info.gname = ""
146
+ tf.addfile(info)
147
+ emitted_dirs.add(dirpath)
148
+
149
+ # Always emit the root package directory itself as the first entry.
150
+ root_info = tarfile.TarInfo(name="package/")
151
+ root_info.type = tarfile.DIRTYPE
152
+ root_info.mode = _DIR_MODE
153
+ root_info.mtime = _DETERMINISTIC_MTIME
154
+ root_info.uid = 0
155
+ root_info.gid = 0
156
+ root_info.uname = ""
157
+ root_info.gname = ""
158
+ tf.addfile(root_info)
159
+
160
+ for m in members:
161
+ if m.is_dir:
162
+ _ensure_dir(m.arcname)
163
+ continue
164
+ parent = "/".join(m.arcname.split("/")[:-1])
165
+ if parent:
166
+ _ensure_dir(parent)
167
+ info = tarfile.TarInfo(name=f"package/{m.arcname}")
168
+ info.size = len(m.data)
169
+ info.mode = _FILE_MODE
170
+ info.mtime = _DETERMINISTIC_MTIME
171
+ info.uid = 0
172
+ info.gid = 0
173
+ info.uname = ""
174
+ info.gname = ""
175
+ info.type = tarfile.REGTYPE
176
+ tf.addfile(info, io.BytesIO(m.data))
177
+
178
+ tar_bytes = tar_buf.getvalue()
179
+ gz_buf = io.BytesIO()
180
+ # mtime=0 + filename=None + no comment -> reproducible gzip header.
181
+ with gzip.GzipFile(filename="", mode="wb", fileobj=gz_buf, mtime=0) as gz:
182
+ gz.write(tar_bytes)
183
+ return gz_buf.getvalue()
184
+
185
+
186
+ def integrity_sha512(data: bytes) -> str:
187
+ import base64
188
+ return "sha512-" + base64.b64encode(hashlib.sha512(data).digest()).decode("ascii")
189
+
190
+
191
+ def shasum_sha1(data: bytes) -> str:
192
+ return hashlib.sha1(data).hexdigest()
193
+
194
+
195
+ def tarball_filename(name: str, version: str) -> str:
196
+ if name.startswith("@") and "/" in name:
197
+ _, simple = name.split("/", 1)
198
+ else:
199
+ simple = name
200
+ return f"{simple}-{version}.tgz"
stemmata/cache.py ADDED
@@ -0,0 +1,238 @@
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ import hashlib
5
+ import os
6
+ import shutil
7
+ import sys
8
+ import tarfile
9
+ import tempfile
10
+ import time
11
+ import uuid
12
+ from dataclasses import dataclass
13
+ from pathlib import Path
14
+ from typing import Iterator
15
+
16
+ from stemmata.errors import CacheError, SchemaError
17
+
18
+
19
+ MAX_DECOMPRESSED_BYTES = 256 * 1024 * 1024
20
+
21
+
22
+ def default_cache_dir() -> Path:
23
+ override = os.environ.get("PROMPT_CLI_CACHE_DIR")
24
+ if override:
25
+ return Path(override)
26
+ return Path.home() / ".cache" / "stemmata"
27
+
28
+
29
+ @dataclass
30
+ class Cache:
31
+ root: Path
32
+
33
+ def __post_init__(self) -> None:
34
+ self.root.mkdir(parents=True, exist_ok=True)
35
+ (self.root / "packages").mkdir(exist_ok=True)
36
+ (self.root / "locks").mkdir(exist_ok=True)
37
+ (self.root / "staging").mkdir(exist_ok=True)
38
+
39
+ def package_dir(self, name: str, version: str) -> Path:
40
+ safe = _safe_dirname(name)
41
+ return self.root / "packages" / safe / version
42
+
43
+ def has_package(self, name: str, version: str) -> bool:
44
+ return self.package_dir(name, version).is_dir()
45
+
46
+ @contextlib.contextmanager
47
+ def lock(self, name: str, version: str) -> Iterator[None]:
48
+ safe = _safe_dirname(f"{name}@{version}")
49
+ lock_path = self.root / "locks" / f"{safe}.lock"
50
+ lock_path.parent.mkdir(parents=True, exist_ok=True)
51
+ fd = os.open(str(lock_path), os.O_CREAT | os.O_RDWR)
52
+ deadline = time.monotonic() + 60.0
53
+ try:
54
+ if sys.platform == "win32":
55
+ import msvcrt
56
+ if os.fstat(fd).st_size < 1:
57
+ os.write(fd, b"\0")
58
+ while True:
59
+ os.lseek(fd, 0, os.SEEK_SET)
60
+ try:
61
+ msvcrt.locking(fd, msvcrt.LK_NBLCK, 1)
62
+ break
63
+ except OSError:
64
+ if time.monotonic() > deadline:
65
+ raise CacheError(str(lock_path), "timed out waiting for lock")
66
+ time.sleep(0.05)
67
+ try:
68
+ yield
69
+ finally:
70
+ os.lseek(fd, 0, os.SEEK_SET)
71
+ try:
72
+ msvcrt.locking(fd, msvcrt.LK_UNLCK, 1)
73
+ except OSError:
74
+ pass
75
+ else:
76
+ import fcntl
77
+ while True:
78
+ try:
79
+ fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
80
+ break
81
+ except (BlockingIOError, OSError):
82
+ if time.monotonic() > deadline:
83
+ raise CacheError(str(lock_path), "timed out waiting for lock")
84
+ time.sleep(0.05)
85
+ try:
86
+ yield
87
+ finally:
88
+ fcntl.flock(fd, fcntl.LOCK_UN)
89
+ finally:
90
+ os.close(fd)
91
+
92
+ def install_tarball(self, name: str, version: str, tarball_bytes: bytes, *, max_decompressed: int = MAX_DECOMPRESSED_BYTES, force: bool = False) -> Path:
93
+ target = self.package_dir(name, version)
94
+ if target.is_dir() and not force:
95
+ return target
96
+ staging = self.root / "staging" / f"{_safe_dirname(name)}-{version}-{uuid.uuid4().hex}"
97
+ staging.mkdir(parents=True, exist_ok=False)
98
+ tmp_tar = staging / "archive.tgz"
99
+ tmp_tar.write_bytes(tarball_bytes)
100
+ try:
101
+ _extract_tarball(tmp_tar, staging / "pkg", max_decompressed=max_decompressed)
102
+ except (SchemaError, CacheError):
103
+ shutil.rmtree(staging, ignore_errors=True)
104
+ raise
105
+ target.parent.mkdir(parents=True, exist_ok=True)
106
+ evict_staging: Path | None = None
107
+ if target.exists():
108
+ evict_staging = self.root / "staging" / f"evict-{uuid.uuid4().hex}"
109
+ os.replace(target, evict_staging)
110
+ os.replace(staging / "pkg", target)
111
+ shutil.rmtree(staging, ignore_errors=True)
112
+ if evict_staging is not None:
113
+ shutil.rmtree(evict_staging, ignore_errors=True)
114
+ return target
115
+
116
+ def evict(self, name: str, version: str) -> tuple[bool, int]:
117
+ target = self.package_dir(name, version)
118
+ if not target.exists():
119
+ return False, 0
120
+ try:
121
+ with self.lock(name, version):
122
+ size = _dir_size(target)
123
+ staging = self.root / "staging" / f"evict-{uuid.uuid4().hex}"
124
+ os.replace(target, staging)
125
+ shutil.rmtree(staging, ignore_errors=True)
126
+ return True, size
127
+ except CacheError:
128
+ return False, 0
129
+
130
+ def clear_all(self) -> tuple[int, int]:
131
+ pkgs_root = self.root / "packages"
132
+ removed = 0
133
+ bytes_freed = 0
134
+ if not pkgs_root.exists():
135
+ return 0, 0
136
+ for scope_dir in pkgs_root.iterdir():
137
+ if not scope_dir.is_dir():
138
+ continue
139
+ for version_dir in scope_dir.iterdir():
140
+ if not version_dir.is_dir():
141
+ continue
142
+ name = _unsafe_dirname(scope_dir.name)
143
+ version = version_dir.name
144
+ ok, size = self.evict(name, version)
145
+ if ok:
146
+ removed += 1
147
+ bytes_freed += size
148
+ with contextlib.suppress(OSError):
149
+ if not any(scope_dir.iterdir()):
150
+ scope_dir.rmdir()
151
+ return removed, bytes_freed
152
+
153
+
154
+ def _safe_dirname(name: str) -> str:
155
+ return name.replace("/", "__").replace("@", "AT_")
156
+
157
+
158
+ def _unsafe_dirname(name: str) -> str:
159
+ return name.replace("__", "/").replace("AT_", "@")
160
+
161
+
162
+ def _dir_size(path: Path) -> int:
163
+ total = 0
164
+ for root, _dirs, files in os.walk(path):
165
+ for f in files:
166
+ with contextlib.suppress(OSError):
167
+ total += os.path.getsize(os.path.join(root, f))
168
+ return total
169
+
170
+
171
+ def _extract_tarball(tar_path: Path, dest: Path, *, max_decompressed: int) -> None:
172
+ dest.mkdir(parents=True, exist_ok=True)
173
+ total = 0
174
+ try:
175
+ with tarfile.open(tar_path, mode="r:gz") as tf:
176
+ try:
177
+ tf.extraction_filter = tarfile.data_filter # type: ignore[attr-defined]
178
+ except AttributeError:
179
+ raise CacheError(str(dest), "python 3.12+ required for safe tar extraction")
180
+ members: list[tarfile.TarInfo] = []
181
+ for m in tf.getmembers():
182
+ name = m.name.replace("\\", "/")
183
+ if name.startswith("/") or ".." in name.split("/"):
184
+ raise SchemaError(
185
+ f"tarball contains unsafe path: {m.name!r}",
186
+ file=str(tar_path),
187
+ field_name="tarball",
188
+ reason="path_traversal",
189
+ )
190
+ if m.issym() or m.islnk():
191
+ raise SchemaError(
192
+ f"tarball contains symlink/hardlink: {m.name!r}",
193
+ file=str(tar_path),
194
+ field_name="tarball",
195
+ reason="symlink_forbidden",
196
+ )
197
+ if m.isdev() or m.isfifo():
198
+ raise SchemaError(
199
+ f"tarball contains device/FIFO: {m.name!r}",
200
+ file=str(tar_path),
201
+ field_name="tarball",
202
+ reason="device_forbidden",
203
+ )
204
+ if m.isfile():
205
+ if m.mode & 0o111:
206
+ raise SchemaError(
207
+ f"tarball file has executable bit: {m.name!r}",
208
+ file=str(tar_path),
209
+ field_name="tarball",
210
+ reason="exec_bit",
211
+ )
212
+ total += m.size
213
+ if total > max_decompressed:
214
+ raise CacheError(str(dest), "decompression size limit exceeded")
215
+ members.append(m)
216
+
217
+ for m in members:
218
+ name = m.name.replace("\\", "/")
219
+ if name.startswith("package/"):
220
+ relname = name[len("package/"):]
221
+ else:
222
+ relname = name
223
+ if not relname:
224
+ continue
225
+ target = dest / relname
226
+ target.parent.mkdir(parents=True, exist_ok=True)
227
+ if m.isdir():
228
+ target.mkdir(exist_ok=True)
229
+ continue
230
+ extracted = tf.extractfile(m)
231
+ if extracted is None:
232
+ continue
233
+ data = extracted.read()
234
+ if target.exists():
235
+ target.unlink()
236
+ target.write_bytes(data)
237
+ except tarfile.TarError as e:
238
+ raise CacheError(str(tar_path), f"invalid tarball: {e}")