supertable 2.3.3__tar.gz → 2.3.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {supertable-2.3.3/supertable.egg-info → supertable-2.3.5}/PKG-INFO +1 -1
- {supertable-2.3.3 → supertable-2.3.5}/pyproject.toml +1 -1
- {supertable-2.3.3 → supertable-2.3.5}/setup.py +1 -1
- {supertable-2.3.3 → supertable-2.3.5}/supertable/__init__.py +1 -1
- {supertable-2.3.3 → supertable-2.3.5}/supertable/config/defaults.py +6 -0
- supertable-2.3.5/supertable/config/homedir.py +96 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/config/settings.py +21 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/data_classes.py +9 -1
- {supertable-2.3.3 → supertable-2.3.5}/supertable/data_reader.py +3 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/data_writer.py +122 -6
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/defaults.py +3 -1
- {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/duckdb_lite.py +29 -1
- {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/duckdb_pro.py +34 -1
- {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/engine_common.py +349 -66
- {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/tests/test_engine.py +548 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/processing.py +204 -32
- {supertable-2.3.3 → supertable-2.3.5}/supertable/simple_table.py +0 -9
- {supertable-2.3.3 → supertable-2.3.5}/supertable/storage/azure_storage.py +7 -2
- {supertable-2.3.3 → supertable-2.3.5}/supertable/storage/gcp_storage.py +7 -2
- {supertable-2.3.3 → supertable-2.3.5}/supertable/storage/local_storage.py +4 -4
- {supertable-2.3.3 → supertable-2.3.5}/supertable/storage/minio_storage.py +7 -2
- {supertable-2.3.3 → supertable-2.3.5}/supertable/storage/s3_storage.py +7 -2
- {supertable-2.3.3 → supertable-2.3.5}/supertable/storage/storage_interface.py +21 -2
- {supertable-2.3.3 → supertable-2.3.5}/supertable/super_table.py +0 -6
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_data_reader.py +5 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_data_writer_comprehensive.py +2 -1
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_processing_stats.py +3 -1
- {supertable-2.3.3 → supertable-2.3.5/supertable.egg-info}/PKG-INFO +1 -1
- supertable-2.3.3/supertable/config/homedir.py +0 -62
- {supertable-2.3.3 → supertable-2.3.5}/LICENSE +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/README.md +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/requirements.txt +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/setup.cfg +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/__init__.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/admin.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/chain.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/consumers.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/crypto.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/events.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/export.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/logger.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/middleware.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/reader.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/retention.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/tests/__init__.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/tests/test_chain.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/tests/test_crypto.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/tests/test_emit.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/tests/test_events.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/tests/test_retention.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/writer_parquet.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/writer_redis.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/config/__init__.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/config/tests/__init__.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/config/tests/test_defaults.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/config/tests/test_homedir.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/config/tests/test_settings.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/__init__.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/__init__.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/__main__.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/check_filter_builder.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/controller.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/data_writer_helpers.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/dummy_data.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/read_parquet_header.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s01_01_01_create_super_table.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s01_01_02_enable_mirroring_formats.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s01_02_create_roles.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s01_03_create_users.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s02_01_write_dummy_data.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s02_02_write_single_data.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s02_03_01_write_staging.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s02_03_02_create_pipe.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s02_04_01_write_monitoring_simple.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s02_04_02_write_monitoring_parallel.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s02_05_write_tombstone.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s03_01_read_data_error.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s03_02_01_read_super_data_ok.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s03_02_02_read_table_data_ok.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s03_03_read_meta.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s03_04_read_staging.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s03_06_01_read_roles.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s03_06_02_read_user.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s03_07_01_estimate_read.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s03_07_02_estimate_files.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s03_08_read_snapshot_history.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s04_01_03_delete_pipe.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s05_01_delete_table.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s05_02_delete_super_table.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/webshop/__init__.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/webshop/core.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/webshop/defaults.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/webshop/generate.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/webshop/load.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/webshop/topup.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/__init__.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/data_estimator.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/engine_config.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/engine_enum.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/executor.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/plan_stats.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/spark_thrift.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/tests/__init__.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/tests/conftest.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/tests/test_engine_config.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/tests/test_engine_routing.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/tests/test_engine_spill.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/errors.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/locking/__init__.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/locking/benchmarks/__init__.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/locking/benchmarks/benchmark_locking.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/locking/benchmarks/measure_lock_speed.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/locking/benchmarks/measure_lock_time.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/locking/file_lock.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/locking/redis_lock.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/locking/tests/__init__.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/locking/tests/test_file_lock.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/locking/tests/test_redis_lock.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/logging.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/meta_reader.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/mirroring/__init__.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/mirroring/mirror_delta.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/mirroring/mirror_formats.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/mirroring/mirror_iceberg.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/mirroring/mirror_parquet.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/monitoring/__init__.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/monitoring/partitions.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/monitoring_writer.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/plan_extender.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/query_plan_manager.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/rbac/__init__.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/rbac/access_control.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/rbac/filter_builder.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/rbac/permissions.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/rbac/role_manager.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/rbac/row_column_security.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/rbac/tests/test_filter_builder.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/rbac/tests/test_rbac.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/rbac/tests/test_rbac_per_table.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/rbac/user_manager.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/redis_catalog.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/redis_connector.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/redis_infra.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/redis_keys.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/staging_area.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/storage/__init__.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/storage/storage_factory.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/storage/tests/test_storage.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/super_pipe.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/system_query.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/__init__.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_align_to_schema_fix.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_create_if_missing.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_data_reader_preflight.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_data_writer.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_data_writer_compact.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_errors.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_meta_reader.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_monitoring_partitions.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_monitoring_sink_guard.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_newer_than.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_parquet_statistics.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_processing.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_processing_compact_resources.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_query_sql.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_read_pruning_differential.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_read_pruning_integration.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_redis_key_prefix.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_resolve_overwrite_writes.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_simple_table.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_stats_cache.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_stats_pruning.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_stats_schema_snapshot.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_super_table.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_supertable_all.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_system_query.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/utils/__init__.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/utils/helper.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/utils/profiler.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/utils/sql_parser.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/utils/tests/test_sql_parser_columns.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable/utils/timer.py +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable.egg-info/SOURCES.txt +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable.egg-info/dependency_links.txt +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable.egg-info/entry_points.txt +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable.egg-info/requires.txt +0 -0
- {supertable-2.3.3 → supertable-2.3.5}/supertable.egg-info/top_level.txt +0 -0
|
@@ -19,7 +19,7 @@ long_description = readme.read_text(encoding="utf-8") if readme.exists() else ""
|
|
|
19
19
|
|
|
20
20
|
setup(
|
|
21
21
|
name="supertable",
|
|
22
|
-
version="2.3.
|
|
22
|
+
version="2.3.5",
|
|
23
23
|
description="SuperTable — versioned data lake library for SQL analytics on Parquet + Redis.",
|
|
24
24
|
long_description=long_description,
|
|
25
25
|
long_description_content_type="text/markdown",
|
|
@@ -25,7 +25,7 @@ See the ``supertable.demo`` package for runnable end-to-end demos and the
|
|
|
25
25
|
project documentation for the full API surface.
|
|
26
26
|
"""
|
|
27
27
|
|
|
28
|
-
__version__ = "2.3.
|
|
28
|
+
__version__ = "2.3.5"
|
|
29
29
|
|
|
30
30
|
# Re-export the core public surface so users can do ``from supertable import …``
|
|
31
31
|
# instead of remembering submodule paths.
|
|
@@ -17,6 +17,12 @@ handler.setFormatter(colorlog.ColoredFormatter(
|
|
|
17
17
|
logging.basicConfig(level=logging.INFO, handlers=[handler])
|
|
18
18
|
logger = logging.getLogger(__name__)
|
|
19
19
|
|
|
20
|
+
# Quiet noisy third-party HTTP client loggers. At DEBUG these emit one line
|
|
21
|
+
# per request (connection setup + every HEAD/GET/PUT), which drowns out
|
|
22
|
+
# SuperTable's own logs. WARNING keeps genuine connection problems visible.
|
|
23
|
+
for _noisy_logger in ("urllib3", "botocore", "boto3", "s3transfer", "boto"):
|
|
24
|
+
logging.getLogger(_noisy_logger).setLevel(logging.WARNING)
|
|
25
|
+
|
|
20
26
|
_VALID_LOG_LEVELS = frozenset({"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"})
|
|
21
27
|
|
|
22
28
|
@dataclass(slots=True)
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import tempfile
|
|
4
|
+
|
|
5
|
+
from supertable.config.settings import settings
|
|
6
|
+
from supertable.config.defaults import logger
|
|
7
|
+
|
|
8
|
+
# If this file is located in a subdirectory, adjust the path logic as needed.
|
|
9
|
+
# Currently appending ".." from __file__ to add the project root directory
|
|
10
|
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
|
11
|
+
|
|
12
|
+
# ---------- lazy home directory resolution ----------
|
|
13
|
+
_resolved_home: str | None = None
|
|
14
|
+
|
|
15
|
+
def _is_writable_dir(path: str) -> bool:
|
|
16
|
+
"""Create *path* if needed and verify we can actually write a file in it.
|
|
17
|
+
|
|
18
|
+
``os.access(..., W_OK)`` is unreliable under containers, ACLs and
|
|
19
|
+
root-squashed mounts, so probe with a real create+unlink: this is the
|
|
20
|
+
difference between a home that merely *resolves* and one DuckDB can root
|
|
21
|
+
its temp/spill, cache and extension dirs under.
|
|
22
|
+
"""
|
|
23
|
+
try:
|
|
24
|
+
os.makedirs(path, exist_ok=True)
|
|
25
|
+
with tempfile.NamedTemporaryFile(dir=path):
|
|
26
|
+
pass
|
|
27
|
+
return True
|
|
28
|
+
except OSError:
|
|
29
|
+
return False
|
|
30
|
+
|
|
31
|
+
def _resolve_app_home() -> str:
|
|
32
|
+
"""
|
|
33
|
+
Resolve, expand, and normalise the application home directory once.
|
|
34
|
+
|
|
35
|
+
The home must be *writable*, not just resolvable: DuckDB roots its
|
|
36
|
+
temp/spill, external-cache and extension directories here, so a
|
|
37
|
+
non-writable home silently breaks every write (the probe fails with
|
|
38
|
+
``errno 13`` and falls back to the slow full-read path). We therefore
|
|
39
|
+
verify writability and, when the configured home is not usable, fall back
|
|
40
|
+
to ``<tempdir>/supertable`` with a loud warning rather than returning a
|
|
41
|
+
path that only looks valid.
|
|
42
|
+
"""
|
|
43
|
+
global _resolved_home
|
|
44
|
+
if _resolved_home is not None:
|
|
45
|
+
return _resolved_home
|
|
46
|
+
|
|
47
|
+
raw = settings.SUPERTABLE_HOME
|
|
48
|
+
expanded = os.path.abspath(os.path.expanduser(raw))
|
|
49
|
+
|
|
50
|
+
if _is_writable_dir(expanded):
|
|
51
|
+
logger.debug(f"Ensured app home directory exists: {expanded}")
|
|
52
|
+
_resolved_home = expanded
|
|
53
|
+
return _resolved_home
|
|
54
|
+
|
|
55
|
+
fallback = os.path.join(tempfile.gettempdir(), "supertable")
|
|
56
|
+
if _is_writable_dir(fallback):
|
|
57
|
+
logger.warning(
|
|
58
|
+
f"SUPERTABLE_HOME={expanded!r} is not writable; falling back to "
|
|
59
|
+
f"{fallback!r}. Set SUPERTABLE_HOME to a writable directory to "
|
|
60
|
+
f"silence this — DuckDB temp/spill, cache and extensions live under it."
|
|
61
|
+
)
|
|
62
|
+
_resolved_home = fallback
|
|
63
|
+
return _resolved_home
|
|
64
|
+
|
|
65
|
+
raise RuntimeError(
|
|
66
|
+
f"No writable application home: tried SUPERTABLE_HOME={expanded!r} and "
|
|
67
|
+
f"fallback {fallback!r}. Set SUPERTABLE_HOME to a writable directory."
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
def change_to_app_home(home_dir: str | None = None) -> None:
|
|
71
|
+
"""
|
|
72
|
+
Attempts to change the current working directory to `home_dir`.
|
|
73
|
+
If home_dir is not provided, uses the resolved app home.
|
|
74
|
+
Logs the outcome.
|
|
75
|
+
"""
|
|
76
|
+
target = home_dir if home_dir else _resolve_app_home()
|
|
77
|
+
expanded_dir = os.path.expanduser(target)
|
|
78
|
+
try:
|
|
79
|
+
os.chdir(expanded_dir)
|
|
80
|
+
logger.debug(f"Changed working directory to {expanded_dir}")
|
|
81
|
+
except Exception as e:
|
|
82
|
+
logger.error(f"Failed to change working directory to {expanded_dir}: {e}")
|
|
83
|
+
|
|
84
|
+
# ---------- eager init (preserves original import-time behaviour) ----------
|
|
85
|
+
_app_home = _resolve_app_home()
|
|
86
|
+
change_to_app_home(_app_home)
|
|
87
|
+
logger.debug(f"Current working directory: {os.getcwd()}")
|
|
88
|
+
|
|
89
|
+
# ---------- public API ----------
|
|
90
|
+
|
|
91
|
+
# Kept for backward compatibility; prefer get_app_home() for the expanded path.
|
|
92
|
+
app_home = _app_home
|
|
93
|
+
|
|
94
|
+
def get_app_home() -> str:
|
|
95
|
+
"""Return the fully expanded, absolute application home directory."""
|
|
96
|
+
return _resolve_app_home()
|
|
@@ -157,6 +157,25 @@ class Settings:
|
|
|
157
157
|
SUPERTABLE_DUCKDB_MATERIALIZE: str = "view" # SUPERTABLE_DUCKDB_MATERIALIZE
|
|
158
158
|
SUPERTABLE_DUCKDB_PRESIGNED: bool = False # SUPERTABLE_DUCKDB_PRESIGNED
|
|
159
159
|
SUPERTABLE_DUCKDB_USE_HTTPFS: bool = False # SUPERTABLE_DUCKDB_USE_HTTPFS
|
|
160
|
+
# Deletion-vector (tombstone) table cache. Each entry is a small
|
|
161
|
+
# `DISTINCT __rowid__` table keyed by the stable tombstone path; the
|
|
162
|
+
# tombstone view ANTI JOINs it instead of re-reading the parquet every
|
|
163
|
+
# query. Eviction is purely per-table — a churny table can never evict a
|
|
164
|
+
# slow table's cached deletion-vector:
|
|
165
|
+
# * Idle TTL (below): every entry, including a table's latest, is dropped
|
|
166
|
+
# once it goes unqueried for the TTL window.
|
|
167
|
+
# * Per-table cap (this knob): at most N most-recently-used versions are
|
|
168
|
+
# kept per table, so a burst of rewrites (e.g. 1000 updates in 5 min)
|
|
169
|
+
# retains only the last N rather than all of them.
|
|
170
|
+
# <= 0 disables the cache entirely (inline read_parquet fallback).
|
|
171
|
+
SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_MAX_PER_TABLE: int = 8 # SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_MAX_PER_TABLE
|
|
172
|
+
# Idle TTL (seconds): a cached deletion-vector is dropped once it has gone
|
|
173
|
+
# unqueried for this long; every query that uses it refreshes the timer.
|
|
174
|
+
# Applies to every entry (a table's latest included), so an abandoned table
|
|
175
|
+
# reclaims its cache instead of lingering until the connection resets.
|
|
176
|
+
# <= 0 keeps an entry only while a query references it (no persistence).
|
|
177
|
+
# Defaults to SUPERTABLE_ENGINE_FRESHNESS_SEC (300 s / 5 min).
|
|
178
|
+
SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_TTL_SEC: int = 300 # SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_TTL_SEC
|
|
160
179
|
SUPERTABLE_DEBUG_TIMINGS: bool = False # SUPERTABLE_DEBUG_TIMINGS
|
|
161
180
|
|
|
162
181
|
# ── Engine Routing / Executor ────────────────────────────────────
|
|
@@ -418,6 +437,8 @@ def _build_settings() -> Settings:
|
|
|
418
437
|
SUPERTABLE_DUCKDB_MATERIALIZE=_env_str("SUPERTABLE_DUCKDB_MATERIALIZE", "view"),
|
|
419
438
|
SUPERTABLE_DUCKDB_PRESIGNED=_env_bool("SUPERTABLE_DUCKDB_PRESIGNED", False),
|
|
420
439
|
SUPERTABLE_DUCKDB_USE_HTTPFS=_env_bool("SUPERTABLE_DUCKDB_USE_HTTPFS", False),
|
|
440
|
+
SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_MAX_PER_TABLE=_env_int("SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_MAX_PER_TABLE", 8),
|
|
441
|
+
SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_TTL_SEC=_env_int("SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_TTL_SEC", 300),
|
|
421
442
|
SUPERTABLE_DEBUG_TIMINGS=_env_bool("SUPERTABLE_DEBUG_TIMINGS", False),
|
|
422
443
|
|
|
423
444
|
# ── Engine Routing ───────────────────────────────────────────
|
|
@@ -61,9 +61,17 @@ class TombstoneDef:
|
|
|
61
61
|
|
|
62
62
|
- tombstone_path: storage path of the deletion-vector parquet
|
|
63
63
|
(columns ``__file__`` + ``__rowid__``). ``None`` means no
|
|
64
|
-
tombstone exists, so no anti-join is applied.
|
|
64
|
+
tombstone exists, so no anti-join is applied. This may be a
|
|
65
|
+
presigned/object-store URL that rotates per request, so it is
|
|
66
|
+
*not* stable enough to use as a cache key.
|
|
67
|
+
- cache_key: the bare, stable storage key of the same deletion-vector
|
|
68
|
+
parquet (no presign). Stable across pure appends (carry-forward
|
|
69
|
+
returns the previous tombstone), so DuckDB engines use it to key the
|
|
70
|
+
materialised deletion-vector table cache. ``None`` disables caching
|
|
71
|
+
for this alias (falls back to inline ``read_parquet``).
|
|
65
72
|
"""
|
|
66
73
|
tombstone_path: Optional[str] = None
|
|
74
|
+
cache_key: Optional[str] = None
|
|
67
75
|
|
|
68
76
|
|
|
69
77
|
@dataclass
|
|
@@ -322,6 +322,9 @@ class DataReader:
|
|
|
322
322
|
# storage returns the key unchanged.
|
|
323
323
|
reflection.tombstone_views[td.alias] = TombstoneDef(
|
|
324
324
|
tombstone_path=estimator._to_duckdb_path(tomb_path),
|
|
325
|
+
# Bare key (pre-presign) is stable across
|
|
326
|
+
# appends → safe deletion-vector cache key.
|
|
327
|
+
cache_key=tomb_path,
|
|
325
328
|
)
|
|
326
329
|
except Exception as te:
|
|
327
330
|
logger.debug(self._lp(f"[tombstone] leaf lookup failed for {td.alias}: {te}"))
|
|
@@ -26,6 +26,7 @@ from supertable.processing import (
|
|
|
26
26
|
resolve_overwrite_writes,
|
|
27
27
|
identify_all_rowids,
|
|
28
28
|
build_tombstone_file,
|
|
29
|
+
reclaim_fully_dead_files,
|
|
29
30
|
build_stats_file,
|
|
30
31
|
extract_stats_rows,
|
|
31
32
|
probe_ranges_from_df,
|
|
@@ -370,6 +371,14 @@ class DataWriter:
|
|
|
370
371
|
profiler=profiler,
|
|
371
372
|
)
|
|
372
373
|
mark("overlap")
|
|
374
|
+
if overwrite_columns:
|
|
375
|
+
_snap_files = len(last_simple_table.get("resources") or [])
|
|
376
|
+
_cand = sum(1 for _, ov, _ in overlapping_files if ov)
|
|
377
|
+
logger.debug(lp(
|
|
378
|
+
f"step[overlap]: {_cand}/{_snap_files} existing file(s) are overwrite "
|
|
379
|
+
f"candidates on {overwrite_columns} "
|
|
380
|
+
f"(snapshot has no per-file key stats → every file is suspect)"
|
|
381
|
+
))
|
|
373
382
|
|
|
374
383
|
# --- Stats-driven file pruning (consumer 5a) ----------------------
|
|
375
384
|
# Narrow the overwrite/delete candidate set using the external stats
|
|
@@ -386,6 +395,10 @@ class DataWriter:
|
|
|
386
395
|
stored_stats_df = load_stats(stats_file, allow_cache=True, profiler=profiler)
|
|
387
396
|
if stored_stats_df is not None and stored_stats_df.height > 0:
|
|
388
397
|
probe = probe_ranges_from_df(dataframe, overwrite_columns)
|
|
398
|
+
_probe_desc = {
|
|
399
|
+
c: (f"{v[0]}[{v[1]}..{v[2]}]" if v else "unconstrained(null/unsupported)")
|
|
400
|
+
for c, v in probe.items()
|
|
401
|
+
}
|
|
389
402
|
before = len(overlapping_files)
|
|
390
403
|
overlapping_files = prune_overlapping_files_by_stats(
|
|
391
404
|
overlapping_files,
|
|
@@ -394,8 +407,21 @@ class DataWriter:
|
|
|
394
407
|
profiler=profiler,
|
|
395
408
|
)
|
|
396
409
|
pruned = before - len(overlapping_files)
|
|
410
|
+
logger.debug(lp(
|
|
411
|
+
f"step[stats-prune]: df-probe {_probe_desc} vs {stored_stats_df.height} "
|
|
412
|
+
f"stored stat row(s) → kept {len(overlapping_files)}/{before}, "
|
|
413
|
+
f"pruned {pruned} (no data file opened)"
|
|
414
|
+
))
|
|
397
415
|
if pruned > 0:
|
|
398
416
|
logger.info(lp(f"stats pruning: skipped {pruned}/{before} candidate files"))
|
|
417
|
+
else:
|
|
418
|
+
logger.debug(lp(
|
|
419
|
+
"step[stats-prune]: stats artifact empty → no pruning, all candidates retained"
|
|
420
|
+
))
|
|
421
|
+
else:
|
|
422
|
+
logger.debug(lp(
|
|
423
|
+
"step[stats-prune]: snapshot has no stats_file → no pruning, all candidates retained"
|
|
424
|
+
))
|
|
399
425
|
mark("stats_prune")
|
|
400
426
|
|
|
401
427
|
# File cache: used only by delete_only's identify_all_rowids below.
|
|
@@ -419,6 +445,16 @@ class DataWriter:
|
|
|
419
445
|
newer_than_col=newer_than,
|
|
420
446
|
profiler=profiler,
|
|
421
447
|
)
|
|
448
|
+
mark("resolve_overwrite")
|
|
449
|
+
_counts = profiler.counts
|
|
450
|
+
_fallback = bool(_counts.get("overwrite_resolve_fallback"))
|
|
451
|
+
logger.debug(lp(
|
|
452
|
+
f"step[probe-resolve] via {'polars-fallback' if _fallback else 'duckdb-pushdown'}: "
|
|
453
|
+
f"matched {_counts.get('probe_rows_matched', _counts.get('delete_rows_matched', 0))} "
|
|
454
|
+
f"existing row(s) on {overwrite_columns} → "
|
|
455
|
+
f"{len(resolved_delete_pairs or [])} (file,__rowid__) delete pair(s); "
|
|
456
|
+
f"{dataframe.height}/{pre_filter_count} incoming row(s) survive"
|
|
457
|
+
))
|
|
422
458
|
if newer_than:
|
|
423
459
|
skipped = pre_filter_count - dataframe.height
|
|
424
460
|
if skipped > 0:
|
|
@@ -472,6 +508,17 @@ class DataWriter:
|
|
|
472
508
|
new_resources = []
|
|
473
509
|
sunset_files = set()
|
|
474
510
|
|
|
511
|
+
# Load the current deletion-vector once: used both to exclude
|
|
512
|
+
# already-tombstoned rows from this write's deletes (below) and,
|
|
513
|
+
# via prev_df, to extend the vector without a second read.
|
|
514
|
+
prev_dv_df = (
|
|
515
|
+
_read_parquet_safe(prev_tombstone_path, profiler=profiler)
|
|
516
|
+
if prev_tombstone_path else None
|
|
517
|
+
)
|
|
518
|
+
prev_dv_rowids = set()
|
|
519
|
+
if prev_dv_df is not None and "__rowid__" in prev_dv_df.columns:
|
|
520
|
+
prev_dv_rowids = set(prev_dv_df.get_column("__rowid__").to_list())
|
|
521
|
+
|
|
475
522
|
# 1. Identify which existing rows this write deletes/replaces.
|
|
476
523
|
# overwrite_columns drives the anti-join key (delete + upsert);
|
|
477
524
|
# pure appends (no overwrite_columns) tombstone nothing. The
|
|
@@ -487,8 +534,26 @@ class DataWriter:
|
|
|
487
534
|
file_cache=file_cache,
|
|
488
535
|
profiler=profiler,
|
|
489
536
|
)
|
|
537
|
+
|
|
538
|
+
# Never re-tombstone rows already in the deletion-vector. The
|
|
539
|
+
# overlap probe (and identify_all_rowids) scan the *physical*
|
|
540
|
+
# files, which still hold logically-deleted rows until
|
|
541
|
+
# compaction; without this filter every write re-counts those
|
|
542
|
+
# already-dead rows — inflating ``deleted`` and forcing a
|
|
543
|
+
# needless tombstone rewrite even when nothing live was removed.
|
|
544
|
+
# Excluding them makes ``deleted`` the true count of live rows
|
|
545
|
+
# removed and lets unchanged writes carry the vector forward.
|
|
546
|
+
if new_delete_pairs and prev_dv_rowids:
|
|
547
|
+
new_delete_pairs = [
|
|
548
|
+
(f, rid) for (f, rid) in new_delete_pairs
|
|
549
|
+
if rid not in prev_dv_rowids
|
|
550
|
+
]
|
|
490
551
|
deleted = len(new_delete_pairs)
|
|
491
552
|
mark("identify_deletes")
|
|
553
|
+
logger.debug(lp(
|
|
554
|
+
f"step[deletes]: tombstoning {deleted} live row(s) this write "
|
|
555
|
+
f"(excluded {len(prev_dv_rowids)} row(s) already in the deletion-vector)"
|
|
556
|
+
))
|
|
492
557
|
|
|
493
558
|
# 2. Write the incoming rows as a new file (insert/upsert side).
|
|
494
559
|
# delete_only carries only predicate columns — nothing to insert.
|
|
@@ -505,6 +570,10 @@ class DataWriter:
|
|
|
505
570
|
else:
|
|
506
571
|
inserted = 0
|
|
507
572
|
mark("write_parquet")
|
|
573
|
+
logger.debug(lp(
|
|
574
|
+
f"step[write]: appended {inserted} incoming row(s) as {len(new_resources)} "
|
|
575
|
+
f"new immutable file(s) (no existing data file rewritten)"
|
|
576
|
+
))
|
|
508
577
|
|
|
509
578
|
# 3. Carry forward + extend the deletion-vector tombstone file.
|
|
510
579
|
# No new deletes → reuse the previous file (combined_df=None).
|
|
@@ -515,6 +584,7 @@ class DataWriter:
|
|
|
515
584
|
new_pairs=new_delete_pairs,
|
|
516
585
|
compression_level=compression_level,
|
|
517
586
|
profiler=profiler,
|
|
587
|
+
prev_df=prev_dv_df,
|
|
518
588
|
)
|
|
519
589
|
|
|
520
590
|
# Track the live deletion-vector row count so meta reads can
|
|
@@ -527,6 +597,42 @@ class DataWriter:
|
|
|
527
597
|
else int(last_simple_table.get("tombstone_rows", 0) or 0)
|
|
528
598
|
)
|
|
529
599
|
mark("build_tombstone")
|
|
600
|
+
logger.debug(lp(
|
|
601
|
+
f"step[tombstone]: deletion-vector now {tombstone_rows} row(s) "
|
|
602
|
+
f"({'rewritten' if combined_tombstone_df is not None else 'carried forward unchanged'})"
|
|
603
|
+
))
|
|
604
|
+
|
|
605
|
+
# 3b. Eager reclamation of fully-dead files. Any existing data
|
|
606
|
+
# file whose every physical row is now tombstoned is 100%
|
|
607
|
+
# dead: drop it from the snapshot for free (no rewrite) and
|
|
608
|
+
# remove its rowids from the vector. Without this, fully
|
|
609
|
+
# deleted files linger until the compaction threshold,
|
|
610
|
+
# bloating the snapshot and getting re-scanned by every later
|
|
611
|
+
# overwrite probe. Only runs when the vector changed this
|
|
612
|
+
# write (combined_tombstone_df is not None) — a carry-forward
|
|
613
|
+
# can create no newly-dead file.
|
|
614
|
+
if combined_tombstone_df is not None:
|
|
615
|
+
reclaimed_files, reclaimed_tomb_path, reclaimed_dv = (
|
|
616
|
+
reclaim_fully_dead_files(
|
|
617
|
+
resources=last_simple_table.get("resources") or [],
|
|
618
|
+
combined_dv=combined_tombstone_df,
|
|
619
|
+
tombstone_dir=tombstone_dir,
|
|
620
|
+
compression_level=compression_level,
|
|
621
|
+
profiler=profiler,
|
|
622
|
+
)
|
|
623
|
+
)
|
|
624
|
+
if reclaimed_files:
|
|
625
|
+
sunset_files |= reclaimed_files
|
|
626
|
+
tombstone_path = reclaimed_tomb_path
|
|
627
|
+
combined_tombstone_df = reclaimed_dv
|
|
628
|
+
tombstone_rows = (
|
|
629
|
+
reclaimed_dv.height if reclaimed_dv is not None else 0
|
|
630
|
+
)
|
|
631
|
+
logger.info(lp(
|
|
632
|
+
f"reclaimed {len(reclaimed_files)} fully-deleted "
|
|
633
|
+
f"file(s); deletion-vector now {tombstone_rows} rows"
|
|
634
|
+
))
|
|
635
|
+
mark("reclaim_dead_files")
|
|
530
636
|
|
|
531
637
|
# 4. Threshold compaction (two triggers, same physical step):
|
|
532
638
|
# (a) the deletion-vector grew past max_tombstone_rows, or
|
|
@@ -538,7 +644,9 @@ class DataWriter:
|
|
|
538
644
|
# rows (hidden on read, never reclaimable). Draining first
|
|
539
645
|
# guarantees Phase B only ever sees vector-free survivors.
|
|
540
646
|
post_write_resources = (
|
|
541
|
-
(last_simple_table.get("resources") or [])
|
|
647
|
+
[r for r in (last_simple_table.get("resources") or [])
|
|
648
|
+
if r.get("file") not in sunset_files]
|
|
649
|
+
+ new_resources
|
|
542
650
|
)
|
|
543
651
|
compaction_gate = should_compact_small_files(
|
|
544
652
|
post_write_resources, table_config
|
|
@@ -554,9 +662,17 @@ class DataWriter:
|
|
|
554
662
|
if tombstone_threshold_hit or compaction_gate:
|
|
555
663
|
dv_to_drain = combined_tombstone_df
|
|
556
664
|
if dv_to_drain is None and tombstone_path:
|
|
557
|
-
# Pure carry-forward:
|
|
558
|
-
#
|
|
559
|
-
|
|
665
|
+
# Pure carry-forward: the pointer is unchanged, so the
|
|
666
|
+
# live vector is exactly the one already loaded at the
|
|
667
|
+
# top of this block — reuse it instead of a second
|
|
668
|
+
# storage read (fall back to a read only if it wasn't
|
|
669
|
+
# loaded, which shouldn't happen when tombstone_path is
|
|
670
|
+
# set, but stays correct if it ever does).
|
|
671
|
+
dv_to_drain = (
|
|
672
|
+
prev_dv_df
|
|
673
|
+
if prev_dv_df is not None
|
|
674
|
+
else _read_parquet_safe(tombstone_path, profiler=profiler)
|
|
675
|
+
)
|
|
560
676
|
if dv_to_drain is not None and dv_to_drain.height > 0:
|
|
561
677
|
removed, tomb_new, tomb_sunset = compact_tombstones(
|
|
562
678
|
snapshot=last_simple_table,
|
|
@@ -743,7 +859,7 @@ class DataWriter:
|
|
|
743
859
|
schema_json = "{}"
|
|
744
860
|
_org, _sup = self.super_table.organization, self.super_table.super_name
|
|
745
861
|
self.catalog.r.set(RK.schema(_org, _sup, simple_name), schema_json)
|
|
746
|
-
self.catalog.r.sadd(RK.
|
|
862
|
+
self.catalog.r.sadd(RK.meta_table_names(_org, _sup), simple_name)
|
|
747
863
|
except Exception as e:
|
|
748
864
|
logger.debug(f"[data-writer] schema/table_names Redis write failed: {e}")
|
|
749
865
|
|
|
@@ -793,7 +909,7 @@ class DataWriter:
|
|
|
793
909
|
f"total={total_duration:.3f} | "
|
|
794
910
|
f"convert={timings.get('convert', 0):.3f} | dedup_ts={timings.get('dedup_ts', 0):.3f} | validate={timings.get('validate', 0):.3f} | "
|
|
795
911
|
f"lock={timings.get('lock', 0):.3f} | snapshot={timings.get('snapshot', 0):.3f} | "
|
|
796
|
-
f"overlap={timings.get('overlap', 0):.3f} | stats_prune={timings.get('stats_prune', 0):.3f} | newer_than={timings.get('newer_than', 0):.3f} | "
|
|
912
|
+
f"overlap={timings.get('overlap', 0):.3f} | stats_prune={timings.get('stats_prune', 0):.3f} | resolve_overwrite={timings.get('resolve_overwrite', 0):.3f} | newer_than={timings.get('newer_than', 0):.3f} | "
|
|
797
913
|
f"identify_deletes={timings.get('identify_deletes', 0):.3f} | write_parquet={timings.get('write_parquet', 0):.3f} | "
|
|
798
914
|
f"build_tombstone={timings.get('build_tombstone', 0):.3f} | compact_tombstones={timings.get('compact_tombstones', 0):.3f} | compact_small={timings.get('compact_small', 0):.3f} | build_stats={timings.get('build_stats', 0):.3f} | "
|
|
799
915
|
f"update_simple={timings.get('update_simple', 0):.3f} | bump_root={timings.get('bump_root', 0):.3f} | "
|
|
@@ -14,7 +14,9 @@ from enum import Enum
|
|
|
14
14
|
|
|
15
15
|
from supertable.config import defaults
|
|
16
16
|
|
|
17
|
-
|
|
17
|
+
# Follow the configured SUPERTABLE_LOG_LEVEL (resolved in supertable.config.defaults)
|
|
18
|
+
# instead of hard-pinning INFO, so DEBUG surfaces the detailed write step[...] logs.
|
|
19
|
+
logging.getLogger("supertable").setLevel(defaults.default.LOG_LEVEL)
|
|
18
20
|
|
|
19
21
|
defaults.default.IS_SHOW_TIMING = True
|
|
20
22
|
|
|
@@ -10,6 +10,7 @@ import duckdb
|
|
|
10
10
|
import pandas as pd
|
|
11
11
|
|
|
12
12
|
from supertable.config.defaults import logger
|
|
13
|
+
from supertable.config.settings import settings
|
|
13
14
|
from supertable.query_plan_manager import QueryPlanManager
|
|
14
15
|
from supertable.utils.sql_parser import SQLParser
|
|
15
16
|
from supertable.data_classes import Reflection
|
|
@@ -24,6 +25,7 @@ from supertable.engine.engine_common import (
|
|
|
24
25
|
apply_runtime_pragmas,
|
|
25
26
|
create_rbac_view,
|
|
26
27
|
create_tombstone_view,
|
|
28
|
+
TombstoneCache,
|
|
27
29
|
)
|
|
28
30
|
|
|
29
31
|
|
|
@@ -56,6 +58,13 @@ class DuckDBLite:
|
|
|
56
58
|
self._lock = threading.Lock()
|
|
57
59
|
self._con: Optional[duckdb.DuckDBPyConnection] = None
|
|
58
60
|
self._httpfs_configured = False
|
|
61
|
+
# Shared deletion-vector table cache: per-table eviction (idle TTL +
|
|
62
|
+
# per-table version cap), bounded by config. Tables live on the
|
|
63
|
+
# persistent connection and are forgotten when it resets.
|
|
64
|
+
self._tombstone_cache = TombstoneCache(
|
|
65
|
+
settings.SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_MAX_PER_TABLE,
|
|
66
|
+
settings.SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_TTL_SEC,
|
|
67
|
+
)
|
|
59
68
|
|
|
60
69
|
# ------------------------------------------------------------------
|
|
61
70
|
# Connection lifecycle
|
|
@@ -92,6 +101,8 @@ class DuckDBLite:
|
|
|
92
101
|
pass
|
|
93
102
|
self._con = None
|
|
94
103
|
self._httpfs_configured = False
|
|
104
|
+
# Tables died with the connection — just forget the registry.
|
|
105
|
+
self._tombstone_cache.clear_registry()
|
|
95
106
|
logger.warning("[duckdb.lite] connection reset")
|
|
96
107
|
|
|
97
108
|
# ------------------------------------------------------------------
|
|
@@ -163,6 +174,8 @@ class DuckDBLite:
|
|
|
163
174
|
|
|
164
175
|
# Create per-query VIEWs. Dropped in finally regardless of outcome.
|
|
165
176
|
created_views: List[str] = []
|
|
177
|
+
# Deletion-vector cache keys acquired this query — released in finally.
|
|
178
|
+
acquired_dv_keys: List[str] = []
|
|
166
179
|
try:
|
|
167
180
|
for alias, table_name in alias_to_table_name.items():
|
|
168
181
|
files = alias_to_files[alias]
|
|
@@ -193,7 +206,15 @@ class DuckDBLite:
|
|
|
193
206
|
source = query_alias_to_name[alias]
|
|
194
207
|
tomb_def = tombstone_views.get(alias)
|
|
195
208
|
view = f"tomb_{source}_{query_suffix}"
|
|
196
|
-
|
|
209
|
+
# Reuse a materialised deletion-vector table when the cache is
|
|
210
|
+
# enabled and the alias has a stable key; otherwise the call
|
|
211
|
+
# falls back to the inline read_parquet path (dv_table=None).
|
|
212
|
+
cache_key = getattr(tomb_def, "cache_key", None) if tomb_def else None
|
|
213
|
+
tomb_path = getattr(tomb_def, "tombstone_path", None) if tomb_def else None
|
|
214
|
+
dv_table = self._tombstone_cache.acquire(con, cache_key, tomb_path)
|
|
215
|
+
if dv_table:
|
|
216
|
+
acquired_dv_keys.append(cache_key)
|
|
217
|
+
create_tombstone_view(con, source, view, tomb_def, dv_table=dv_table)
|
|
197
218
|
created_views.append(view)
|
|
198
219
|
query_alias_to_name[alias] = view
|
|
199
220
|
|
|
@@ -260,4 +281,11 @@ class DuckDBLite:
|
|
|
260
281
|
con.execute(f"DROP VIEW IF EXISTS {view};")
|
|
261
282
|
except Exception:
|
|
262
283
|
pass
|
|
284
|
+
# Release deletion-vector refs now the views referencing them are
|
|
285
|
+
# gone; this may evict + DROP unreferenced DV tables over capacity.
|
|
286
|
+
for cache_key in acquired_dv_keys:
|
|
287
|
+
try:
|
|
288
|
+
self._tombstone_cache.release(con, cache_key)
|
|
289
|
+
except Exception:
|
|
290
|
+
pass
|
|
263
291
|
|
|
@@ -29,6 +29,7 @@ from supertable.engine.engine_common import (
|
|
|
29
29
|
create_rbac_view,
|
|
30
30
|
create_tombstone_view,
|
|
31
31
|
rbac_view_name,
|
|
32
|
+
TombstoneCache,
|
|
32
33
|
)
|
|
33
34
|
|
|
34
35
|
|
|
@@ -80,6 +81,14 @@ class DuckDBPro:
|
|
|
80
81
|
# Multiple entries per key when old version still has in-flight queries.
|
|
81
82
|
self._registry: Dict[Tuple[str, str], List[_ProCacheEntry]] = {}
|
|
82
83
|
|
|
84
|
+
# Shared deletion-vector table cache: per-table eviction (idle TTL +
|
|
85
|
+
# per-table version cap), bounded by config. Tables live on the
|
|
86
|
+
# persistent connection and are forgotten when it resets.
|
|
87
|
+
self._tombstone_cache = TombstoneCache(
|
|
88
|
+
settings.SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_MAX_PER_TABLE,
|
|
89
|
+
settings.SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_TTL_SEC,
|
|
90
|
+
)
|
|
91
|
+
|
|
83
92
|
# Temp dir for spill — set on first query
|
|
84
93
|
self._temp_dir: Optional[str] = None
|
|
85
94
|
|
|
@@ -123,6 +132,8 @@ class DuckDBPro:
|
|
|
123
132
|
self._con = None
|
|
124
133
|
self._httpfs_configured = False
|
|
125
134
|
self._registry.clear()
|
|
135
|
+
# DV tables died with the connection — just forget the registry.
|
|
136
|
+
self._tombstone_cache.clear_registry()
|
|
126
137
|
logger.warning("[duckdb.pro] connection reset — all cached views lost")
|
|
127
138
|
|
|
128
139
|
# ---------------------------------------------------------
|
|
@@ -313,6 +324,8 @@ class DuckDBPro:
|
|
|
313
324
|
# assignments are reached (which would cause a NameError otherwise).
|
|
314
325
|
rbac_view_names: List[str] = []
|
|
315
326
|
tombstone_view_names: List[str] = []
|
|
327
|
+
# Deletion-vector cache keys acquired this query — released in finally.
|
|
328
|
+
acquired_dv_keys: List[str] = []
|
|
316
329
|
try:
|
|
317
330
|
query_alias_to_name = dict(alias_to_table_name)
|
|
318
331
|
# Per-query suffix so concurrent queries never collide on a shared
|
|
@@ -328,8 +341,18 @@ class DuckDBPro:
|
|
|
328
341
|
source = query_alias_to_name[alias]
|
|
329
342
|
tomb_def = tombstone_views.get(alias)
|
|
330
343
|
view = f"tomb_{source}_{query_suffix}"
|
|
344
|
+
# Reuse a materialised deletion-vector table when the cache is
|
|
345
|
+
# enabled and the alias has a stable key; otherwise fall back to
|
|
346
|
+
# the inline read_parquet path (dv_table=None). All DDL — the DV
|
|
347
|
+
# CREATE TABLE inside acquire() and the view creation — runs
|
|
348
|
+
# under the connection lock, matching Pro's serialised model.
|
|
349
|
+
cache_key = getattr(tomb_def, "cache_key", None) if tomb_def else None
|
|
350
|
+
tomb_path = getattr(tomb_def, "tombstone_path", None) if tomb_def else None
|
|
331
351
|
with self._lock:
|
|
332
|
-
|
|
352
|
+
dv_table = self._tombstone_cache.acquire(con, cache_key, tomb_path)
|
|
353
|
+
if dv_table:
|
|
354
|
+
acquired_dv_keys.append(cache_key)
|
|
355
|
+
create_tombstone_view(con, source, view, tomb_def, dv_table=dv_table)
|
|
333
356
|
tombstone_view_names.append(view)
|
|
334
357
|
query_alias_to_name[alias] = view
|
|
335
358
|
|
|
@@ -395,6 +418,16 @@ class DuckDBPro:
|
|
|
395
418
|
except Exception:
|
|
396
419
|
pass
|
|
397
420
|
|
|
421
|
+
# Release deletion-vector refs now their views are gone; this may
|
|
422
|
+
# evict + DROP unreferenced DV tables over capacity.
|
|
423
|
+
if acquired_dv_keys:
|
|
424
|
+
with self._lock:
|
|
425
|
+
for cache_key in acquired_dv_keys:
|
|
426
|
+
try:
|
|
427
|
+
self._tombstone_cache.release(con, cache_key)
|
|
428
|
+
except Exception:
|
|
429
|
+
pass
|
|
430
|
+
|
|
398
431
|
# Release refs and drop stale tables
|
|
399
432
|
with self._lock:
|
|
400
433
|
self._release_refs(tables_used)
|