PyPI - supertable - Versions diffs - 2.3.5__tar.gz → 2.3.6__tar.gz - Mend

supertable 2.3.5tar.gz → 2.3.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (186) hide show

{supertable-2.3.5/supertable.egg-info → supertable-2.3.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: supertable
-Version: 2.3.5
+Version: 2.3.6
 Summary: SuperTable — versioned data lake library for SQL analytics on Parquet + Redis.
 Author: Levente Kupas
 Author-email: Levente Kupas <lkupas@kladnasoft.com>

{supertable-2.3.5 → supertable-2.3.6}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "supertable"
-version = "2.3.5"
+version = "2.3.6"
 description = "SuperTable — versioned data lake library for SQL analytics on Parquet + Redis."
 readme = "README.md"
 requires-python = ">=3.10"

{supertable-2.3.5 → supertable-2.3.6}/setup.py RENAMED Viewed

@@ -19,7 +19,7 @@ long_description = readme.read_text(encoding="utf-8") if readme.exists() else ""
 setup(
     name="supertable",
-    version="2.3.5",
+    version="2.3.6",
     description="SuperTable — versioned data lake library for SQL analytics on Parquet + Redis.",
     long_description=long_description,
     long_description_content_type="text/markdown",

{supertable-2.3.5 → supertable-2.3.6}/supertable/__init__.py RENAMED Viewed

@@ -25,7 +25,7 @@ See the ``supertable.demo`` package for runnable end-to-end demos and the
 project documentation for the full API surface.
 """
-__version__ = "2.3.5"
+__version__ = "2.3.6"
 # Re-export the core public surface so users can do ``from supertable import …``
 # instead of remembering submodule paths.

{supertable-2.3.5 → supertable-2.3.6}/supertable/data_writer.py RENAMED Viewed

@@ -5,6 +5,7 @@ import json
 import os
 import time
 import uuid
+from concurrent.futures import ThreadPoolExecutor
 from datetime import datetime, timezone
 import re
@@ -343,8 +344,18 @@ class DataWriter:
             # layout and tight row-group zonemaps). Together with __rowid__ it
             # is hidden from query output by the read view's
             # ``EXCLUDE (__rowid__, __timestamp__)`` projection.
+            #
+            # System-owned, exactly like __rowid__ above: ALWAYS overwrite any
+            # caller-supplied __timestamp__ instead of preserving it.  It is a
+            # reserved internal column that is both the dedup ORDER BY key (newest
+            # per key wins) and the source of the __p_year__/month/day partition
+            # derivation (processing.py); letting a caller inject an arbitrary value
+            # (wrong dtype, non-UTC, or chosen to game which row wins) would
+            # silently corrupt partitioning and dedup.  ``newer_than`` is the
+            # supported, explicit mechanism for caller-controlled conflict
+            # resolution.
             table_config = self._get_table_config(simple_name)
-            if not delete_only and "__timestamp__" not in dataframe.columns:
+            if not delete_only:
                 dataframe = dataframe.with_columns(
                     polars.lit(datetime.now(timezone.utc)).alias("__timestamp__")
                 )
@@ -511,12 +522,21 @@ class DataWriter:
                 # Load the current deletion-vector once: used both to exclude
                 # already-tombstoned rows from this write's deletes (below) and,
                 # via prev_df, to extend the vector without a second read.
+                # required=True: a DV that exists but cannot be read must abort
+                # the write, never be treated as empty — silently dropping the
+                # carried-forward vector would resurrect previously deleted rows.
                 prev_dv_df = (
-                    _read_parquet_safe(prev_tombstone_path, profiler=profiler)
+                    _read_parquet_safe(prev_tombstone_path, profiler=profiler, required=True)
                     if prev_tombstone_path else None
                 )
+                # The rowid set is consumed only by the idempotency filter below,
+                # which runs only when this write actually tombstones rows
+                # (overwrite or delete_only).  Pure appends tombstone nothing, so
+                # skip materialising the whole deletion-vector as a Python set —
+                # prev_dv_df is still carried forward into build_tombstone_file.
                 prev_dv_rowids = set()
-                if prev_dv_df is not None and "__rowid__" in prev_dv_df.columns:
+                if (overwrite_columns or delete_only) and prev_dv_df is not None \
+                        and "__rowid__" in prev_dv_df.columns:
                     prev_dv_rowids = set(prev_dv_df.get_column("__rowid__").to_list())
                 # 1. Identify which existing rows this write deletes/replaces.
@@ -555,38 +575,87 @@ class DataWriter:
                     f"(excluded {len(prev_dv_rowids)} row(s) already in the deletion-vector)"
                 ))
-                # 2. Write the incoming rows as a new file (insert/upsert side).
-                #    delete_only carries only predicate columns — nothing to insert.
-                if not delete_only and dataframe.height > 0:
+                # 2. + 3.  Write the incoming rows as a new data file (insert/
+                #    upsert side) AND carry-forward/extend the deletion-vector
+                #    tombstone file.  These two object-store PUTs are independent:
+                #    neither reads the other's output and they write to disjoint
+                #    dirs (data/ vs tombstone/), so they run concurrently to
+                #    overlap the two round-trips.  delete_only carries only
+                #    predicate columns → nothing to insert.  No new deletes →
+                #    build_tombstone reuses the previous file (combined_df=None).
+                #
+                #    Profiler is NOT thread-safe, so each branch records into its
+                #    own sub-profiler which the parent merges after the join;
+                #    each branch also measures its own wall time so the per-phase
+                #    monitoring timings stay meaningful despite the overlap.
+                #    Footers of files written via the write_bytes path are captured
+                #    in footer_md_cache so stats extraction (step 6) reuses them
+                #    instead of re-downloading each freshly-written file.
+                footer_md_cache = {}
+                tombstone_dir = os.path.join(simple_table.simple_dir, "tombstone")
+                do_insert = (not delete_only and dataframe.height > 0)
+                def _write_data_branch():
+                    sub = Profiler()
+                    t = time.perf_counter()
                     write_parquet_and_collect_resources(
                         write_df=dataframe,
                         overwrite_columns=[],
                         data_dir=simple_table.data_dir,
                         new_resources=new_resources,
                         compression_level=compression_level,
-                        profiler=profiler,
+                        profiler=sub,
+                        footer_md_out=footer_md_cache,
                     )
+                    return sub, time.perf_counter() - t
+                def _write_tombstone_branch():
+                    sub = Profiler()
+                    t = time.perf_counter()
+                    tp, cdf = build_tombstone_file(
+                        tombstone_dir=tombstone_dir,
+                        prev_tombstone_path=prev_tombstone_path,
+                        new_pairs=new_delete_pairs,
+                        compression_level=compression_level,
+                        profiler=sub,
+                        prev_df=prev_dv_df,
+                    )
+                    return tp, cdf, sub, time.perf_counter() - t
+                if do_insert:
+                    with ThreadPoolExecutor(max_workers=2) as _ex:
+                        _f_data = _ex.submit(_write_data_branch)
+                        _f_tomb = _ex.submit(_write_tombstone_branch)
+                        # .result() re-raises in the parent: a failure in either
+                        # PUT aborts the write before any snapshot commit, exactly
+                        # as the former sequential path did (an orphaned immutable
+                        # file no snapshot references is harmless garbage).
+                        data_sub, data_secs = _f_data.result()
+                        tombstone_path, combined_tombstone_df, tomb_sub, tomb_secs = (
+                            _f_tomb.result()
+                        )
+                    profiler.merge(data_sub)
+                    profiler.merge(tomb_sub)
                     inserted = dataframe.height
                 else:
+                    tombstone_path, combined_tombstone_df, tomb_sub, tomb_secs = (
+                        _write_tombstone_branch()
+                    )
+                    profiler.merge(tomb_sub)
+                    data_secs = 0.0
                     inserted = 0
-                mark("write_parquet")
+                # Assign the two per-phase timings from each branch's own measured
+                # wall time (they overlapped, so the serial mark() deltas would
+                # misattribute the time), then advance the mark() baseline.
+                timings["write_parquet"] = data_secs
+                timings["build_tombstone"] = tomb_secs
+                t_last = time.time()
                 logger.debug(lp(
                     f"step[write]: appended {inserted} incoming row(s) as {len(new_resources)} "
                     f"new immutable file(s) (no existing data file rewritten)"
                 ))
-                # 3. Carry forward + extend the deletion-vector tombstone file.
-                #    No new deletes → reuse the previous file (combined_df=None).
-                tombstone_dir = os.path.join(simple_table.simple_dir, "tombstone")
-                tombstone_path, combined_tombstone_df = build_tombstone_file(
-                    tombstone_dir=tombstone_dir,
-                    prev_tombstone_path=prev_tombstone_path,
-                    new_pairs=new_delete_pairs,
-                    compression_level=compression_level,
-                    profiler=profiler,
-                    prev_df=prev_dv_df,
-                )
                 # Track the live deletion-vector row count so meta reads can
                 # deduct dead rows from the physical resource row totals.
                 # New deletes → combined_tombstone_df is the full deduped DV
@@ -596,7 +665,6 @@ class DataWriter:
                     if combined_tombstone_df is not None
                     else int(last_simple_table.get("tombstone_rows", 0) or 0)
                 )
-                mark("build_tombstone")
                 logger.debug(lp(
                     f"step[tombstone]: deletion-vector now {tombstone_rows} row(s) "
                     f"({'rewritten' if combined_tombstone_df is not None else 'carried forward unchanged'})"
@@ -745,7 +813,9 @@ class DataWriter:
                     r.get("file") for r in new_resources
                     if isinstance(r, dict) and r.get("file")
                 ]
-                new_stats_rows = extract_stats_rows(new_data_files, profiler=profiler)
+                new_stats_rows = extract_stats_rows(
+                    new_data_files, profiler=profiler, footer_md_cache=footer_md_cache
+                )
                 stats_path, combined_stats_df = build_stats_file(
                     stats_dir=stats_dir,
                     prev_stats_path=last_simple_table.get("stats_file"),
@@ -1181,8 +1251,17 @@ class DataWriter:
             # the *write* path; compact() is explicit maintenance and always
             # consumes the vector.
             tombstone_path = last_simple_table.get("tombstone")
+            # required=True: a DV that exists but cannot be read must abort the
+            # compaction, never be treated as empty. A swallowed read here would
+            # set should_run_tombstones=False, skipping both Phase A and the
+            # pointer-clear below, so Phase B would carry the dead rows into the
+            # new file while the vector kept pointing at the sunset __file__ —
+            # leaving them permanently unreclaimable. Failing loud leaves the
+            # prior snapshot + vector intact for a retry, and matches the
+            # write-path carry-forward read (required=True) above.
             tombstone_df = (
-                _read_parquet_safe(tombstone_path) if tombstone_path else None
+                _read_parquet_safe(tombstone_path, required=True)
+                if tombstone_path else None
             )
             tombstone_rows = (
                 tombstone_df.height if tombstone_df is not None else 0
@@ -1246,6 +1325,24 @@ class DataWriter:
                 r for r in (list(tomb_new_resources) + list(small_new_resources))
                 if r.get("file") not in all_sunset
             ]
+            # ``all_new_resources`` is the full set of files written by THIS
+            # compaction; it feeds stats extraction, the schema model_df and the
+            # result metrics below, all of which need every new file.
+            #
+            # For ``simple_table.update`` it must NOT be reused verbatim, though:
+            # Phase A's outputs were already spliced into
+            # ``last_simple_table["resources"]`` (the in-memory baseline that
+            # ``update`` starts from) right after Phase A ran.  ``update`` does
+            # ``(baseline - sunset) + new_resources`` with no dedup, so any
+            # Phase-A output that Phase B did NOT consume (left un-sunset because
+            # it exceeded the ``small_only`` threshold, or its read failed) would
+            # be counted once from the baseline AND once from new_resources —
+            # i.e. the same file listed twice in the new snapshot.  Hand ``update``
+            # only Phase B's brand-new files, which are the only resources genuinely
+            # absent from that baseline.
+            update_new_resources = [
+                r for r in small_new_resources if r.get("file") not in all_sunset
+            ]
             result["files_compacted"] = considered
             result["new_resources"] = len(all_new_resources)
             result["sunset_files"] = len(all_sunset)
@@ -1338,7 +1435,7 @@ class DataWriter:
                 )
                 new_snapshot_dict, new_snapshot_path = simple_table.update(
-                    all_new_resources,
+                    update_new_resources,
                     all_sunset,
                     model_df,
                     last_snapshot=last_simple_table,

{supertable-2.3.5 → supertable-2.3.6}/supertable/engine/engine_common.py RENAMED Viewed

@@ -731,12 +731,66 @@ def new_duckdb_connection(
     purely local scans.
     """
     con = duckdb.connect()
-    init_connection(con, temp_dir=temp_dir, memory_limit=memory_limit)
-    if for_paths and any("://" in str(p) for p in for_paths):
+    try:
+        init_connection(con, temp_dir=temp_dir, memory_limit=memory_limit)
+        if for_paths and any("://" in str(p) for p in for_paths):
+            configure_httpfs_and_s3(con, for_paths)
+    except Exception:
+        # Don't leak the half-initialised connection if a pragma / httpfs load
+        # raises; re-raise so callers still fall back exactly as before.
+        con.close()
+        raise
+    return con
+# Thread-local pool for the write-side probe connection.  DuckDB connections are
+# NOT thread-safe, so each thread keeps its own; reusing it amortises the
+# ~150 ms init/warmup across writes on the same thread — the same reason the
+# read executors hold a persistent connection.
+_probe_pool = threading.local()
+def get_pooled_duckdb_connection(
+        temp_dir: str,
+        for_paths: Optional[List[str]] = None,
+        memory_limit: str = "1GB",
+) -> duckdb.DuckDBPyConnection:
+    """Return this thread's pooled probe connection, building it on first use.
+    The cold build goes through ``new_duckdb_connection`` so the pinned
+    ``home_directory`` / pragma contract is byte-for-byte identical to a
+    transient connection.  On a *warm* connection httpfs/S3 is re-applied for
+    remote paths so a connection first built for local paths can still serve a
+    later remote probe and credentials always reflect the current environment
+    (``configure_httpfs_and_s3`` re-reads env each call and is idempotent).
+    """
+    con = getattr(_probe_pool, "con", None)
+    if con is None:
+        con = new_duckdb_connection(
+            temp_dir=temp_dir, for_paths=for_paths, memory_limit=memory_limit
+        )
+        _probe_pool.con = con
+    elif for_paths and any("://" in str(p) for p in for_paths):
         configure_httpfs_and_s3(con, for_paths)
     return con
+def reset_pooled_duckdb_connections() -> None:
+    """Close and drop the calling thread's pooled probe connection.
+    A no-op when the thread has none.  Used for test determinism and as an
+    eviction hook; the pool slot is cleared before the close so a failing close
+    still leaves the thread ready to rebuild.
+    """
+    con = getattr(_probe_pool, "con", None)
+    if con is not None:
+        _probe_pool.con = None
+        try:
+            con.close()
+        except Exception:
+            pass
 def apply_runtime_pragmas(con: duckdb.DuckDBPyConnection, cfg) -> None:
     """Re-apply the session-settable DuckDB pragmas from a live engine config.

{supertable-2.3.5 → supertable-2.3.6}/supertable/engine/tests/conftest.py RENAMED Viewed

@@ -86,6 +86,21 @@ def _mock_redis_catalog():
         yield
+@pytest.fixture(autouse=True)
+def _reset_probe_pool():
+    """Clear the thread-local write-probe connection pool around every test.
+    The probe now reuses a pooled connection across writes, so tests that
+    assert how many times ``new_duckdb_connection`` is built must start from a
+    cold pool; resetting afterwards keeps the connection from leaking into the
+    next test.
+    """
+    from supertable.engine.engine_common import reset_pooled_duckdb_connections
+    reset_pooled_duckdb_connections()
+    yield
+    reset_pooled_duckdb_connections()
 @pytest.fixture()
 def duckdb_con():
     """Provide a real in-memory DuckDB connection, closed after each test."""

{supertable-2.3.5 → supertable-2.3.6}/supertable/engine/tests/test_engine.py RENAMED Viewed

@@ -587,6 +587,38 @@ class TestReadWriteDuckDBParity:
         # for_paths forwarded so httpfs is loaded for remote scans.
         assert "for_paths" in calls[0][1]
+    def test_probe_reuses_pooled_connection(self, tmp_path, monkeypatch):
+        # A second probe on the same thread must REUSE the pooled connection,
+        # so new_duckdb_connection is built exactly once — the ~150ms warmup is
+        # paid on the cold probe and amortised on every subsequent write.
+        import polars
+        from supertable import processing as _processing
+        monkeypatch.setattr(_processing, "_get_storage", lambda: object())
+        f1 = str(tmp_path / "f1.parquet")
+        polars.DataFrame({"__rowid__": [10, 20], "id": [1, 2]}).write_parquet(f1)
+        calls = []
+        real = _engine_common.new_duckdb_connection
+        monkeypatch.setattr(
+            _engine_common,
+            "new_duckdb_connection",
+            lambda *a, **k: (calls.append((a, k)), real(*a, **k))[1],
+        )
+        def _probe():
+            return _processing._duckdb_probe_overlap_matches(
+                overlap_true_files=[(f1, 0)],
+                overwrite_columns=["id"],
+                newer_than_col=None,
+                incoming_keys=polars.DataFrame({"id": [2]}),
+            )
+        assert _probe() is not None
+        assert _probe() is not None
+        assert len(calls) == 1  # built on the cold probe, reused on the warm one
     def test_probe_matches_rows_on_local_parquet(self, tmp_path, monkeypatch):
         import polars
         from supertable import processing as _processing

{supertable-2.3.5 → supertable-2.3.6}/supertable/processing.py RENAMED Viewed

@@ -212,12 +212,17 @@ def concat_many_with_union(frames: List[polars.DataFrame]) -> polars.DataFrame:
 # Safe storage I/O helpers
 # =========================
-def _safe_exists(path: str, profiler: Optional[Profiler] = None) -> bool:
+def _safe_exists(path: str, profiler: Optional[Profiler] = None, strict: bool = False) -> bool:
     p = profiler or get_null_profiler()
     try:
         with p.span("io.exists"):
             return _get_storage().exists(path)
     except Exception:
+        # A failed existence probe is normally treated as "absent" (lenient).
+        # *strict* callers (carry-forward reads) must not mistake a backend
+        # error for a genuine absence, so re-raise instead.
+        if strict:
+            raise
         return False
@@ -226,9 +231,21 @@ def _read_parquet_safe(
         profiler: Optional[Profiler] = None,
         file_size: int = 0,
         columns: Optional[List[str]] = None,
+        required: bool = False,
 ) -> Optional[polars.DataFrame]:
+    """Read a parquet object into polars, or ``None`` when it is absent.
+    When *required* is True a genuine read failure — the object exists but cannot
+    be read (corrupt body, transient/persistent backend error) — is re-raised
+    instead of being swallowed to ``None``.  Absence still returns ``None`` even
+    when required (a missing object, or one sunset by a concurrent writer, is a
+    legitimate "no previous artifact" signal).  Carry-forward callers that would
+    otherwise silently drop a still-referenced artifact — the deletion-vector —
+    must pass ``required=True`` so a failed read aborts the write rather than
+    persisting a truncated successor (which would resurrect deleted rows).
+    """
     p = profiler or get_null_profiler()
-    if not _safe_exists(path, profiler=p):
+    if not _safe_exists(path, profiler=p, strict=required):
         logging.info(f"[race] file already sunset by another writer: {path}")
         return None
     try:
@@ -251,6 +268,8 @@ def _read_parquet_safe(
         return None
     except Exception as e:
         logging.warning(f"[read] failed to read parquet at {path}: {e}")
+        if required:
+            raise
         return None
@@ -553,6 +572,7 @@ def compact_resources(
 def write_parquet_and_collect_resources(
         write_df, overwrite_columns, data_dir, new_resources, compression_level=10,
         profiler: Optional[Profiler] = None,
+        footer_md_out: Optional[Dict] = None,
 ):
     """Write a DataFrame as one or more Parquet files and append resource dicts.
@@ -593,7 +613,7 @@ def write_parquet_and_collect_resources(
         if has_nulls:
             null_df = partitioned.filter(null_mask).drop(["__p_year__", "__p_month__", "__p_day__"])
-            _write_single_parquet_file(null_df, overwrite_columns, data_dir, new_resources, compression_level, profiler=profiler)
+            _write_single_parquet_file(null_df, overwrite_columns, data_dir, new_resources, compression_level, profiler=profiler, footer_md_out=footer_md_out)
             partitioned = partitioned.filter(~null_mask)
         if partitioned.height > 0:
@@ -613,16 +633,17 @@ def write_parquet_and_collect_resources(
                 )
                 _write_single_parquet_file(
                     group_df, overwrite_columns, partition_dir, new_resources, compression_level,
-                    profiler=profiler,
+                    profiler=profiler, footer_md_out=footer_md_out,
                 )
     else:
         # --- Flat write path (no __timestamp__) — backward compatible ---
-        _write_single_parquet_file(write_df, overwrite_columns, data_dir, new_resources, compression_level, profiler=profiler)
+        _write_single_parquet_file(write_df, overwrite_columns, data_dir, new_resources, compression_level, profiler=profiler, footer_md_out=footer_md_out)
 def _write_single_parquet_file(
         write_df, overwrite_columns, target_dir, new_resources, compression_level=10,
         profiler: Optional[Profiler] = None,
+        footer_md_out: Optional[Dict] = None,
 ):
     """Write a single Parquet file into *target_dir* and append a resource entry.
@@ -676,6 +697,17 @@ def _write_single_parquet_file(
         if hasattr(_get_storage(), "write_bytes"):
             with p.span("write.upload_bytes"):
                 _get_storage().write_bytes(new_parquet_path, data)
+            # The uploaded bytes ARE ``data`` here, so parse the footer in memory
+            # (footer-only, no decode, no network round-trip) for stats reuse.
+            # ONLY on this path: the write_parquet / polars fallbacks below
+            # re-encode via a different writer, so their on-disk row-group layout
+            # and statistics need not match ``data`` — reusing it there could
+            # mis-prune row groups on read.
+            if footer_md_out is not None:
+                try:
+                    footer_md_out[new_parquet_path] = pq.read_metadata(io.BytesIO(data))
+                except Exception:
+                    pass
         elif hasattr(_get_storage(), "write_parquet"):
             with p.span("write.upload_parquet"):
                 _get_storage().write_parquet(arrow_tbl, new_parquet_path)
@@ -799,9 +831,14 @@ def filter_stale_incoming_rows(
             polars.col(newer_than_col).max().alias("__existing_max__")
         )
-    # Left join incoming against existing max
+    # Left join incoming against existing max.  nulls_equal=True so a NULL key
+    # compares against the existing NULL group's max, consistent with the
+    # null-safe delete semi-join — otherwise an older NULL-keyed row would skip
+    # the stale filter yet still tombstone the newer existing NULL-keyed row.
     with p.span("newer_than.join_filter"):
-        joined = incoming_df.join(existing_max, on=overwrite_columns, how="left")
+        joined = incoming_df.join(
+            existing_max, on=overwrite_columns, how="left", nulls_equal=True
+        )
         # Keep rows where:
         #   - no existing data for this key (null max → new key)
@@ -970,7 +1007,12 @@ def identify_all_rowids(
         if file_cache is not None and file in file_cache:
             existing_df = file_cache.get(file)
         else:
-            existing_df = _read_parquet_safe(file, profiler=p, file_size=file_size)
+            # Only __rowid__ is consumed below, so read just that column chunk.
+            # A delete-all can touch every file; a full-width read would pull all
+            # columns of every file into memory for nothing.
+            existing_df = _read_parquet_safe(
+                file, profiler=p, file_size=file_size, columns=[ROWID_COL]
+            )
         if existing_df is None or ROWID_COL not in existing_df.columns:
             continue
         rowids = existing_df.get_column(ROWID_COL).drop_nulls().to_list()
@@ -1072,7 +1114,7 @@ def _duckdb_probe_overlap_matches(
     try:
         import duckdb  # noqa: F401  (imported for availability check / errors)
         from supertable.engine.engine_common import (
-            new_duckdb_connection,
+            get_pooled_duckdb_connection,
             configure_httpfs_and_s3,
             escape_parquet_path,
             quote_if_needed,
@@ -1129,11 +1171,12 @@ def _duckdb_probe_overlap_matches(
     con = None
     try:
-        # Build the connection exactly like the read path (same pragmas, and a
-        # pinned home_directory) so the probe never falls back to the OS home —
-        # which is absent under a restricted service user.  httpfs/S3 is loaded
-        # by the helper only when duck_paths contain a remote URL.
-        con = new_duckdb_connection(temp_dir="write_probe", for_paths=duck_paths)
+        # Reuse this thread's pooled connection (cold-built exactly like the
+        # read path: same pragmas, pinned home_directory so the probe never
+        # falls back to the OS home, which is absent under a restricted service
+        # user).  The pool re-applies httpfs/S3 for remote paths, so a warm
+        # connection is configured for the current probe's object store.
+        con = get_pooled_duckdb_connection(temp_dir="write_probe", for_paths=duck_paths)
         con.register(ik_name, incoming_keys.to_arrow())
         try:
             matched = _run(duck_paths)
@@ -1157,14 +1200,13 @@ def _duckdb_probe_overlap_matches(
         return None
     finally:
         if con is not None:
+            # Return the connection to the thread-local pool (do NOT close it);
+            # only drop the per-probe registered relation so the uuid-named
+            # keys table can't accumulate across reuses.
             try:
                 con.unregister(ik_name)
             except Exception:
                 pass
-            try:
-                con.close()
-            except Exception:
-                pass
     if matched is None or "filename" not in matched.columns:
         return None
@@ -1240,7 +1282,11 @@ def _derive_stale_and_deletes(
                 polars.col(newer_than_col).max().alias("__existing_max__")
             )
         with p.span("newer_than.join_filter"):
-            joined = incoming_df.join(existing_max, on=overwrite_columns, how="left")
+            # nulls_equal=True keeps this consistent with the null-safe delete
+            # semi-join below and the polars fallback oracle.
+            joined = incoming_df.join(
+                existing_max, on=overwrite_columns, how="left", nulls_equal=True
+            )
             filtered = joined.filter(
                 polars.col("__existing_max__").is_null()
                 | (polars.col(newer_than_col) > polars.col("__existing_max__"))
@@ -1380,7 +1426,9 @@ def build_tombstone_file(
     )
     if prev_df is None and prev_tombstone_path:
-        prev_df = _read_parquet_safe(prev_tombstone_path, profiler=p)
+        # required=True: refuse to build a truncated deletion-vector if the
+        # previous one exists but cannot be read (would resurrect dead rows).
+        prev_df = _read_parquet_safe(prev_tombstone_path, profiler=p, required=True)
     if prev_df is not None and prev_df.height > 0 and ROWID_COL in prev_df.columns:
         combined = polars.concat(
             [prev_df.select([TOMBSTONE_FILE_COL, ROWID_COL]), new_df],
@@ -1658,6 +1706,7 @@ def _empty_stats_df() -> polars.DataFrame:
 def extract_stats_rows(
         file_paths: List[str],
         profiler: Optional[Profiler] = None,
+        footer_md_cache: Optional[Dict] = None,
 ) -> polars.DataFrame:
     """Read the footers of *file_paths* and return their stats rows.
@@ -1665,13 +1714,23 @@ def extract_stats_rows(
     ``__rowid__`` / ``__timestamp__`` columns.  Files whose footer cannot be
     read (race / corruption) are skipped.  Returns a frame with ``STATS_SCHEMA``
     (possibly empty).
+    *footer_md_cache* (optional) maps a file path to a parquet ``FileMetaData``
+    already parsed in memory at write time (from the exact bytes that were
+    uploaded).  When a path is present its footer is reused directly, skipping a
+    full-file re-download; otherwise the footer is read back from storage.
     """
     p = profiler or get_null_profiler()
+    cache = footer_md_cache or {}
     all_rows: List[dict] = []
     for path in file_paths:
         if not path:
             continue
-        md = _read_footer_metadata(path, profiler=p)
+        md = cache.get(path)
+        if md is None:
+            md = _read_footer_metadata(path, profiler=p)
+        else:
+            p.add("stats_footer_cache_hit", 1)
         if md is None:
             continue
         all_rows.extend(_stats_rows_for_metadata(path, md))
@@ -2244,7 +2303,18 @@ def compact_tombstones(
             # File already sunset by an earlier compaction — skip.
             continue
         file_size = int(resource.get("file_size") or 0)
-        existing_df = _read_parquet_safe(file_path, profiler=p, file_size=file_size)
+        # required=True: this is the ONLY physical drain (Phase B is row-preserving
+        # and never re-drops these rows), and the callers clear the deletion-vector
+        # pointer unconditionally once the vector was non-empty.  If a transient
+        # backend error here were swallowed to None, this file's tombstoned rows
+        # would be silently skipped yet the pointer cleared -> the rows RESURRECT on
+        # read.  Failing loud aborts the write/compact with the prior snapshot +
+        # vector intact for retry (matches the carry-forward DV-pointer reads).  A
+        # genuine absence still returns None (file already sunset/raced -> its rows
+        # are gone, so skipping it is correct).
+        existing_df = _read_parquet_safe(
+            file_path, profiler=p, file_size=file_size, required=True
+        )
         if existing_df is None or ROWID_COL not in existing_df.columns:
             continue

supertable 2.3.5__tar.gz → 2.3.6__tar.gz

supertable 2.3.5tar.gz → 2.3.6tar.gz