PyPI - stashfs - Versions diffs - 0.1.0__py3-none-any.whl - Mend

stashfs 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

stashfs/__init__.py +80 -0
stashfs/cli.py +143 -0
stashfs/container.py +138 -0
stashfs/crypto.py +125 -0
stashfs/file_index.py +71 -0
stashfs/fuse_app.py +366 -0
stashfs/legacy_fs.py +101 -0
stashfs/optimize.py +190 -0
stashfs/slot_table.py +179 -0
stashfs/storage.py +212 -0
stashfs/volume.py +321 -0
stashfs-0.1.0.dist-info/METADATA +11 -0
stashfs-0.1.0.dist-info/RECORD +15 -0
stashfs-0.1.0.dist-info/WHEEL +4 -0
stashfs-0.1.0.dist-info/entry_points.txt +2 -0

stashfs/__init__.py ADDED Viewed

@@ -0,0 +1,80 @@
+"""stashfs package - FUSE-based single-file filesystem (formerly ``fly`` / ``fyl``).
+Public API is re-exported here so callers and tests can continue to do
+``from stashfs import ...``. Implementation is split across submodules to
+leave room for the crypto stack.
+"""
+from stashfs.container import (
+    CHUNK_FRAME_SIZE,
+    CHUNK_PAYLOAD_SIZE,
+    DATA_START,
+    HEADER_SIZE,
+    N_SLOTS,
+    SLOT_SIZE,
+    SLOT_TABLE_SIZE,
+    Container,
+    ContainerCorrupt,
+)
+from stashfs.crypto import KDF, KEY_SIZE, NONCE_SIZE, TAG_SIZE, AEADChunk, KDFParams
+from stashfs.file_index import FileIndexCorrupt, VolumeFile
+from stashfs.fuse_app import (
+    TIME_PAT,
+    MyStat,
+    Stash,
+    auto_unmount,
+    call_fuse_exit,
+    log,
+    main,
+    mount,
+    parse_args,
+    update_log_level,
+)
+from stashfs.legacy_fs import MAGIC_BYTES, FileRecord, FileStructure
+from stashfs.slot_table import FLAG_FREE, FLAG_OCCUPIED, PasswordDoesNotMatch, SlotInfo, SlotTable
+from stashfs.storage import CoverStorage, FileWrapper, Storage
+from stashfs.volume import Volume, VolumeCorrupt
+__all__ = [
+    'CHUNK_FRAME_SIZE',
+    'CHUNK_PAYLOAD_SIZE',
+    'DATA_START',
+    'FLAG_FREE',
+    'FLAG_OCCUPIED',
+    'HEADER_SIZE',
+    'KDF',
+    'KEY_SIZE',
+    'MAGIC_BYTES',
+    'NONCE_SIZE',
+    'N_SLOTS',
+    'SLOT_SIZE',
+    'SLOT_TABLE_SIZE',
+    'TAG_SIZE',
+    'TIME_PAT',
+    'AEADChunk',
+    'Container',
+    'ContainerCorrupt',
+    'CoverStorage',
+    'FileIndexCorrupt',
+    'FileRecord',
+    'FileStructure',
+    'FileWrapper',
+    'KDFParams',
+    'MyStat',
+    'PasswordDoesNotMatch',
+    'SlotInfo',
+    'SlotTable',
+    'Stash',
+    'Storage',
+    'Volume',
+    'VolumeCorrupt',
+    'VolumeFile',
+    'auto_unmount',
+    'call_fuse_exit',
+    'log',
+    'main',
+    'mount',
+    'parse_args',
+    'update_log_level',
+]

stashfs/cli.py ADDED Viewed

@@ -0,0 +1,143 @@
+"""Unified ``stashfs`` command-line entry point.
+Exposes two subcommands:
+* ``stashfs mount <backing> [mountpoint]`` — mount the FUSE filesystem.
+* ``stashfs optimize <backing>`` — rebuild the backing file, reclaiming
+  space left behind by deletions, overwrites, and renames.
+Installed as a console script via ``[project.scripts]`` so users can
+run it as ``stashfs ...`` after ``uv tool install .``.
+A bare path to an existing file (``stashfs /path/to/backing``) is treated
+as shorthand for ``stashfs mount /path/to/backing`` — mounting is the
+overwhelmingly common case and typing ``mount`` every time is friction.
+"""
+from __future__ import annotations
+import argparse
+import getpass
+import logging
+import sys
+from pathlib import Path
+from stashfs.crypto import KDF
+from stashfs.fuse_app import _configure_logging, run_mount
+log = logging.getLogger('stashfs.cli')
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(prog='stashfs', description='Encrypted single-file FUSE filesystem')
+    sub = parser.add_subparsers(dest='command', required=True)
+    mount_p = sub.add_parser('mount', help='Mount the filesystem')
+    mount_p.add_argument('fname', type=lambda x: Path(x).resolve())
+    mount_p.add_argument('mountpoint', nargs='?', default='/tmp/aaa', type=Path)
+    mount_p.add_argument('--ttl', type=int, default=300)
+    mount_p.add_argument('--debug', action='store_true')
+    opt_p = sub.add_parser('optimize', help='Rebuild the backing file to reclaim space')
+    opt_p.add_argument('fname', type=lambda x: Path(x).resolve())
+    opt_p.add_argument(
+        '--password',
+        action='append',
+        default=[],
+        help='Password for an occupied slot (repeatable). Omit to be prompted interactively.',
+    )
+    opt_p.add_argument(
+        '--drop-locked',
+        action='store_true',
+        help='Dangerous: purge any occupied slot whose password is unknown.',
+    )
+    opt_p.add_argument('--debug', action='store_true')
+    return parser
+_SUBCOMMANDS = frozenset({'mount', 'optimize'})
+def _inject_implicit_mount(argv: list[str] | None) -> list[str] | None:
+    """If the user typed ``stashfs <existing-file>``, prepend ``mount``.
+    We only kick in when the first positional argument is neither a
+    known subcommand nor a help/option flag, and it points at an
+    existing filesystem path. Anything else falls through to argparse
+    untouched so error messages stay accurate.
+    """
+    if argv is None:
+        argv = sys.argv[1:]
+    if not argv:
+        return argv
+    first = argv[0]
+    if first in _SUBCOMMANDS or first.startswith('-'):
+        return argv
+    if Path(first).exists():
+        return ['mount', *argv]
+    return argv
+def main(argv: list[str] | None = None) -> int:
+    parser = build_parser()
+    args = parser.parse_args(_inject_implicit_mount(argv))
+    _configure_logging(getattr(args, 'debug', False))
+    if args.command == 'mount':
+        return _run_mount(args)
+    if args.command == 'optimize':
+        return _run_optimize(args)
+    parser.error(f'unknown command {args.command!r}')
+    return 2
+def _build_kdf(_args: argparse.Namespace) -> KDF:
+    """Seam for tests to inject a faster KDF."""
+    return KDF()
+def _run_mount(args: argparse.Namespace) -> int:
+    run_mount(args)
+    return 0
+def _run_optimize(args: argparse.Namespace) -> int:
+    from stashfs.optimize import OptimizeError, optimize
+    if not args.fname.exists():
+        print(f'error: {args.fname} does not exist', file=sys.stderr)
+        return 1
+    passwords = list(args.password)
+    if not passwords:
+        # Interactive: prompt once for each likely slot. Users can press
+        # enter to stop adding passwords (empty string is always a
+        # valid slot-0 password).
+        while True:
+            pw = getpass.getpass('Password (enter on empty line to finish): ')
+            if pw == '' and passwords:
+                break
+            passwords.append(pw)
+            if pw == '':
+                # The user entered empty as their FIRST password; treat
+                # that as "try empty-slot only".
+                break
+    try:
+        report = optimize(args.fname, passwords, kdf=_build_kdf(args), drop_locked=args.drop_locked)
+    except OptimizeError as exc:
+        print(f'error: {exc}', file=sys.stderr)
+        return 1
+    print(
+        f'optimize: {args.fname} {report.old_size} -> {report.new_size} bytes '
+        f'(reclaimed {report.reclaimed}), slots rebuilt={report.rebuilt_slots} '
+        f'dropped={report.dropped_slots}'
+    )
+    return 0
+if __name__ == '__main__':
+    raise SystemExit(main())

stashfs/container.py ADDED Viewed

@@ -0,0 +1,138 @@
+"""Fixed-layout container over a ``Storage``.
+The container owns the on-disk layout decisions that are *not* crypto:
+where the header lives, how big a slot is, how big a chunk frame is,
+and how chunks are addressed. The ``Volume`` layer will stack crypto on
+top; the ``SlotTable`` layer will interpret the 768-byte slot blob.
+Layout (offsets in bytes)::
+    0    : 16B  global_salt (random, fed to Argon2id)
+    16   : 640B slot_table (8 slots x 80B)
+    656  : chunk[0]
+    4780 : chunk[1]
+    ...
+A chunk frame is always exactly ``CHUNK_FRAME_SIZE`` bytes: Volume will
+interpret it as ``12B nonce || 4096B ciphertext || 16B tag``. The
+container does not decode the frame; it just stores and retrieves them.
+"""
+from __future__ import annotations
+import os
+from pathlib import Path
+from stashfs.storage import FileWrapper, Storage
+HEADER_SIZE = 16
+SLOT_SIZE = 80
+N_SLOTS = 8
+SLOT_TABLE_SIZE = SLOT_SIZE * N_SLOTS
+CHUNK_PAYLOAD_SIZE = 4096
+CHUNK_FRAME_SIZE = CHUNK_PAYLOAD_SIZE + 12 + 16  # nonce + ciphertext + tag
+DATA_START = HEADER_SIZE + SLOT_TABLE_SIZE
+class ContainerCorrupt(Exception):
+    """Raised when the backing storage cannot be interpreted as a container."""
+class Container:
+    """Framed chunk store over any ``Storage``.
+    On first use with an empty backing, the container writes a random
+    header and slot table. The caller (typically ``Volume``) is
+    responsible for populating the slot table with real cryptographic
+    wrappings once a password-protected volume is actually in use.
+    """
+    def __init__(self, storage: Storage) -> None:
+        self._storage = storage
+        self._ensure_initialised()
+    @classmethod
+    def open_path(cls, path: Path) -> Container:
+        """Convenience: wrap a path in a ``FileWrapper`` and open it."""
+        return cls(FileWrapper(path))
+    @property
+    def storage(self) -> Storage:
+        return self._storage
+    def _ensure_initialised(self) -> None:
+        current = self._storage.size()
+        if current == 0:
+            # Header + slot table start as uniform random bytes so free
+            # slots don't look structured on disk. We then deterministically
+            # clear the first byte of every slot to 0x00, because that byte
+            # is the free/occupied flag and must not accidentally read as
+            # 0x01 when the slot is actually free.
+            blob = bytearray(os.urandom(HEADER_SIZE + SLOT_TABLE_SIZE))
+            for i in range(N_SLOTS):
+                blob[HEADER_SIZE + i * SLOT_SIZE] = 0x00
+            self._storage.write_end(bytes(blob))
+            return
+        if current < DATA_START:
+            raise ContainerCorrupt(f'backing is {current} bytes, need at least {DATA_START} for header+slot_table')
+        tail = (current - DATA_START) % CHUNK_FRAME_SIZE
+        if tail != 0:
+            raise ContainerCorrupt(f'chunk region is not a multiple of {CHUNK_FRAME_SIZE} (extra {tail} bytes)')
+    def read_header(self) -> bytes:
+        return self._storage.read(HEADER_SIZE, 0)
+    def write_header(self, header: bytes) -> None:
+        if len(header) != HEADER_SIZE:
+            raise ValueError(f'header must be {HEADER_SIZE} bytes')
+        self._storage.write(0, header)
+    def read_slot_table(self) -> bytes:
+        return self._storage.read(SLOT_TABLE_SIZE, HEADER_SIZE)
+    def write_slot_table(self, blob: bytes) -> None:
+        if len(blob) != SLOT_TABLE_SIZE:
+            raise ValueError(f'slot_table must be {SLOT_TABLE_SIZE} bytes')
+        self._storage.write(HEADER_SIZE, blob)
+    def read_slot(self, index: int) -> bytes:
+        self._check_slot_index(index)
+        return self._storage.read(SLOT_SIZE, HEADER_SIZE + index * SLOT_SIZE)
+    def write_slot(self, index: int, blob: bytes) -> None:
+        self._check_slot_index(index)
+        if len(blob) != SLOT_SIZE:
+            raise ValueError(f'slot must be {SLOT_SIZE} bytes')
+        self._storage.write(HEADER_SIZE + index * SLOT_SIZE, blob)
+    def num_chunks(self) -> int:
+        return (self._storage.size() - DATA_START) // CHUNK_FRAME_SIZE
+    def read_chunk(self, index: int) -> bytes:
+        self._check_chunk_index(index)
+        return self._storage.read(CHUNK_FRAME_SIZE, DATA_START + index * CHUNK_FRAME_SIZE)
+    def write_chunk(self, index: int, frame: bytes) -> None:
+        self._check_chunk_index(index)
+        if len(frame) != CHUNK_FRAME_SIZE:
+            raise ValueError(f'chunk frame must be {CHUNK_FRAME_SIZE} bytes')
+        self._storage.write(DATA_START + index * CHUNK_FRAME_SIZE, frame)
+    def append_chunk(self, frame: bytes) -> int:
+        if len(frame) != CHUNK_FRAME_SIZE:
+            raise ValueError(f'chunk frame must be {CHUNK_FRAME_SIZE} bytes')
+        index = self.num_chunks()
+        self._storage.write_end(frame)
+        return index
+    def _check_slot_index(self, index: int) -> None:
+        if not 0 <= index < N_SLOTS:
+            raise IndexError(f'slot index {index} out of range [0, {N_SLOTS})')
+    def _check_chunk_index(self, index: int) -> None:
+        total = self.num_chunks()
+        if not 0 <= index < total:
+            raise IndexError(f'chunk index {index} out of range [0, {total})')

stashfs/crypto.py ADDED Viewed

@@ -0,0 +1,125 @@
+"""Crypto primitives for the encrypted container.
+Two small, independently testable pieces:
+* ``KDF``      -- Argon2id for the slow password -> master key step,
+                  HKDF-SHA256 for the fast master -> per-slot key step.
+* ``AEADChunk`` -- AES-256-GCM seal/open with nonce + tag framed inline.
+We use AES-256-GCM (32-byte keys) throughout. The plan document mentions
+"AES-128-GCM" in one bullet but the same bullet also specifies a 32-byte
+volume key; AES-256 is the consistent choice and gives us extra margin
+for free.
+"""
+from __future__ import annotations
+import os
+import struct
+from dataclasses import dataclass
+from argon2.low_level import Type, hash_secret_raw
+from cryptography.exceptions import InvalidTag
+from cryptography.hazmat.primitives import hashes
+from cryptography.hazmat.primitives.ciphers.aead import AESGCM
+from cryptography.hazmat.primitives.kdf.hkdf import HKDF
+KEY_SIZE = 32
+NONCE_SIZE = 12
+TAG_SIZE = 16
+HKDF_INFO_PREFIX = b'stashfs/slot/'
+@dataclass(frozen=True)
+class KDFParams:
+    """Argon2id cost parameters.
+    Production defaults are conservative; tests use the cheaper preset
+    via ``KDFParams.fast()`` so the whole suite stays well under its
+    per-test time budget.
+    """
+    time_cost: int = 3
+    memory_cost: int = 64 * 1024  # 64 MiB
+    parallelism: int = 1
+    @classmethod
+    def fast(cls) -> KDFParams:
+        return cls(time_cost=1, memory_cost=8 * 1024, parallelism=1)
+class KDF:
+    """Password -> master key -> per-slot key pipeline."""
+    def __init__(self, params: KDFParams | None = None) -> None:
+        self.params = params or KDFParams()
+    def master(self, password: bytes | str, salt: bytes) -> bytes:
+        """Argon2id(password, salt) -> ``KEY_SIZE`` bytes.
+        The empty password is a valid, stable input; callers upstream use
+        it deliberately for the slot-0 "no password" volume.
+        """
+        if isinstance(password, str):
+            password = password.encode('utf-8')
+        return hash_secret_raw(
+            secret=password,
+            salt=salt,
+            time_cost=self.params.time_cost,
+            memory_cost=self.params.memory_cost,
+            parallelism=self.params.parallelism,
+            hash_len=KEY_SIZE,
+            type=Type.ID,
+        )
+    @staticmethod
+    def derive_slot(master_key: bytes, slot_index: int, out_len: int = KEY_SIZE) -> bytes:
+        """HKDF-SHA256 expand master_key into a per-slot key.
+        The ``info`` tag binds the key to a slot index so different slots
+        never share a derived key even under the same master.
+        """
+        info = HKDF_INFO_PREFIX + struct.pack('>I', slot_index)
+        hkdf = HKDF(
+            algorithm=hashes.SHA256(),
+            length=out_len,
+            salt=None,
+            info=info,
+        )
+        return hkdf.derive(master_key)
+class AEADChunk:
+    """AES-256-GCM seal/open with nonce + tag packed into the frame."""
+    NONCE_SIZE = NONCE_SIZE
+    TAG_SIZE = TAG_SIZE
+    def __init__(self, key: bytes) -> None:
+        if len(key) != KEY_SIZE:
+            raise ValueError(f'key must be {KEY_SIZE} bytes, got {len(key)}')
+        self._aead = AESGCM(key)
+    def seal(self, plaintext: bytes, associated_data: bytes | None = None) -> bytes:
+        """Encrypt ``plaintext``. Return ``nonce || ciphertext || tag``."""
+        nonce = os.urandom(NONCE_SIZE)
+        ct_and_tag = self._aead.encrypt(nonce, plaintext, associated_data)
+        return nonce + ct_and_tag
+    def open(self, frame: bytes, associated_data: bytes | None = None) -> bytes | None:
+        """Decrypt a sealed frame. Return plaintext or ``None`` on auth failure."""
+        if len(frame) < NONCE_SIZE + TAG_SIZE:
+            return None
+        nonce = frame[:NONCE_SIZE]
+        ct_and_tag = frame[NONCE_SIZE:]
+        try:
+            return self._aead.decrypt(nonce, ct_and_tag, associated_data)
+        except InvalidTag:
+            return None
+    @staticmethod
+    def frame_overhead() -> int:
+        """Bytes added on top of plaintext length when sealing."""
+        return NONCE_SIZE + TAG_SIZE

stashfs/file_index.py ADDED Viewed

@@ -0,0 +1,71 @@
+"""Serialisable per-volume file index.
+Replaces the byte-offset-based ``FileStructure`` from the legacy layout
+with a chunk-id-based one. Each ``VolumeFile`` records the ordered list
+of chunk ids that hold its plaintext data, plus the logical size so we
+know how much of the last chunk is live.
+Serialisation format (all integers big-endian)::
+    u32     num_files
+    repeat num_files times:
+        u32   name_length
+        bytes name (utf-8)
+        u64   size
+        u32   num_chunks
+        u64 * num_chunks   chunk_ids
+"""
+from __future__ import annotations
+import struct
+from dataclasses import dataclass, field
+@dataclass
+class VolumeFile:
+    name: str
+    size: int = 0
+    chunk_ids: list[int] = field(default_factory=list)
+class FileIndexCorrupt(Exception):
+    pass
+def serialize(files: dict[str, VolumeFile]) -> bytes:
+    out = bytearray()
+    out.extend(struct.pack('>I', len(files)))
+    for name in sorted(files):
+        vf = files[name]
+        encoded = vf.name.encode('utf-8')
+        out.extend(struct.pack('>I', len(encoded)))
+        out.extend(encoded)
+        out.extend(struct.pack('>Q', vf.size))
+        out.extend(struct.pack('>I', len(vf.chunk_ids)))
+        for cid in vf.chunk_ids:
+            out.extend(struct.pack('>Q', cid))
+    return bytes(out)
+def parse(blob: bytes) -> dict[str, VolumeFile]:
+    try:
+        pos = 0
+        (num_files,) = struct.unpack_from('>I', blob, pos)
+        pos += 4
+        files: dict[str, VolumeFile] = {}
+        for _ in range(num_files):
+            (name_len,) = struct.unpack_from('>I', blob, pos)
+            pos += 4
+            name = blob[pos : pos + name_len].decode('utf-8')
+            pos += name_len
+            (size,) = struct.unpack_from('>Q', blob, pos)
+            pos += 8
+            (num_chunks,) = struct.unpack_from('>I', blob, pos)
+            pos += 4
+            chunk_ids = list(struct.unpack_from(f'>{num_chunks}Q', blob, pos))
+            pos += 8 * num_chunks
+            files[name] = VolumeFile(name=name, size=size, chunk_ids=chunk_ids)
+        return files
+    except struct.error as e:
+        raise FileIndexCorrupt(str(e)) from e