supertable 2.3.6__tar.gz → 2.3.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {supertable-2.3.6/supertable.egg-info → supertable-2.3.7}/PKG-INFO +1 -1
- {supertable-2.3.6 → supertable-2.3.7}/pyproject.toml +1 -1
- {supertable-2.3.6 → supertable-2.3.7}/setup.py +1 -1
- {supertable-2.3.6 → supertable-2.3.7}/supertable/__init__.py +1 -1
- {supertable-2.3.6 → supertable-2.3.7}/supertable/config/settings.py +8 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/processing.py +15 -7
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_resolve_overwrite_writes.py +16 -0
- supertable-2.3.7/supertable/tests/test_write_probe_gate.py +130 -0
- {supertable-2.3.6 → supertable-2.3.7/supertable.egg-info}/PKG-INFO +1 -1
- {supertable-2.3.6 → supertable-2.3.7}/supertable.egg-info/SOURCES.txt +1 -0
- {supertable-2.3.6 → supertable-2.3.7}/LICENSE +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/README.md +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/requirements.txt +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/setup.cfg +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/__init__.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/admin.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/chain.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/consumers.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/crypto.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/events.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/export.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/logger.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/middleware.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/reader.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/retention.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/tests/__init__.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/tests/test_chain.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/tests/test_crypto.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/tests/test_emit.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/tests/test_events.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/tests/test_retention.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/writer_parquet.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/writer_redis.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/config/__init__.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/config/defaults.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/config/homedir.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/config/tests/__init__.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/config/tests/test_defaults.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/config/tests/test_homedir.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/config/tests/test_settings.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/data_classes.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/data_reader.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/data_writer.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/__init__.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/__init__.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/__main__.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/check_filter_builder.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/controller.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/data_writer_helpers.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/defaults.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/dummy_data.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/read_parquet_header.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s01_01_01_create_super_table.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s01_01_02_enable_mirroring_formats.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s01_02_create_roles.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s01_03_create_users.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s02_01_write_dummy_data.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s02_02_write_single_data.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s02_03_01_write_staging.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s02_03_02_create_pipe.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s02_04_01_write_monitoring_simple.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s02_04_02_write_monitoring_parallel.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s02_05_write_tombstone.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s03_01_read_data_error.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s03_02_01_read_super_data_ok.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s03_02_02_read_table_data_ok.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s03_03_read_meta.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s03_04_read_staging.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s03_06_01_read_roles.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s03_06_02_read_user.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s03_07_01_estimate_read.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s03_07_02_estimate_files.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s03_08_read_snapshot_history.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s04_01_03_delete_pipe.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s05_01_delete_table.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s05_02_delete_super_table.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/webshop/__init__.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/webshop/core.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/webshop/defaults.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/webshop/generate.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/webshop/load.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/webshop/topup.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/__init__.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/data_estimator.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/duckdb_lite.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/duckdb_pro.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/engine_common.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/engine_config.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/engine_enum.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/executor.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/plan_stats.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/spark_thrift.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/tests/__init__.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/tests/conftest.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/tests/test_engine.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/tests/test_engine_config.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/tests/test_engine_routing.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/tests/test_engine_spill.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/errors.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/locking/__init__.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/locking/benchmarks/__init__.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/locking/benchmarks/benchmark_locking.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/locking/benchmarks/measure_lock_speed.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/locking/benchmarks/measure_lock_time.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/locking/file_lock.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/locking/redis_lock.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/locking/tests/__init__.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/locking/tests/test_file_lock.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/locking/tests/test_redis_lock.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/logging.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/meta_reader.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/mirroring/__init__.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/mirroring/mirror_delta.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/mirroring/mirror_formats.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/mirroring/mirror_iceberg.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/mirroring/mirror_parquet.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/monitoring/__init__.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/monitoring/partitions.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/monitoring_writer.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/plan_extender.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/query_plan_manager.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/rbac/__init__.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/rbac/access_control.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/rbac/filter_builder.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/rbac/permissions.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/rbac/role_manager.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/rbac/row_column_security.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/rbac/tests/test_filter_builder.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/rbac/tests/test_rbac.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/rbac/tests/test_rbac_per_table.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/rbac/user_manager.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/redis_catalog.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/redis_connector.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/redis_infra.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/redis_keys.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/simple_table.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/staging_area.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/storage/__init__.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/storage/azure_storage.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/storage/gcp_storage.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/storage/local_storage.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/storage/minio_storage.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/storage/s3_storage.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/storage/storage_factory.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/storage/storage_interface.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/storage/tests/test_storage.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/super_pipe.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/super_table.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/system_query.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/__init__.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_align_to_schema_fix.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_create_if_missing.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_data_reader.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_data_reader_preflight.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_data_writer.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_data_writer_compact.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_data_writer_comprehensive.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_errors.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_meta_reader.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_monitoring_partitions.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_monitoring_sink_guard.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_newer_than.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_parquet_statistics.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_processing.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_processing_compact_resources.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_processing_stats.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_query_sql.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_read_pruning_differential.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_read_pruning_integration.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_redis_key_prefix.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_simple_table.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_stats_cache.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_stats_pruning.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_stats_schema_snapshot.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_super_table.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_supertable_all.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_system_query.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/utils/__init__.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/utils/helper.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/utils/profiler.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/utils/sql_parser.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/utils/tests/test_sql_parser_columns.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable/utils/timer.py +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable.egg-info/dependency_links.txt +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable.egg-info/entry_points.txt +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable.egg-info/requires.txt +0 -0
- {supertable-2.3.6 → supertable-2.3.7}/supertable.egg-info/top_level.txt +0 -0
|
@@ -19,7 +19,7 @@ long_description = readme.read_text(encoding="utf-8") if readme.exists() else ""
|
|
|
19
19
|
|
|
20
20
|
setup(
|
|
21
21
|
name="supertable",
|
|
22
|
-
version="2.3.
|
|
22
|
+
version="2.3.7",
|
|
23
23
|
description="SuperTable — versioned data lake library for SQL analytics on Parquet + Redis.",
|
|
24
24
|
long_description=long_description,
|
|
25
25
|
long_description_content_type="text/markdown",
|
|
@@ -25,7 +25,7 @@ See the ``supertable.demo`` package for runnable end-to-end demos and the
|
|
|
25
25
|
project documentation for the full API surface.
|
|
26
26
|
"""
|
|
27
27
|
|
|
28
|
-
__version__ = "2.3.
|
|
28
|
+
__version__ = "2.3.7"
|
|
29
29
|
|
|
30
30
|
# Re-export the core public surface so users can do ``from supertable import …``
|
|
31
31
|
# instead of remembering submodule paths.
|
|
@@ -157,6 +157,13 @@ class Settings:
|
|
|
157
157
|
SUPERTABLE_DUCKDB_MATERIALIZE: str = "view" # SUPERTABLE_DUCKDB_MATERIALIZE
|
|
158
158
|
SUPERTABLE_DUCKDB_PRESIGNED: bool = False # SUPERTABLE_DUCKDB_PRESIGNED
|
|
159
159
|
SUPERTABLE_DUCKDB_USE_HTTPFS: bool = False # SUPERTABLE_DUCKDB_USE_HTTPFS
|
|
160
|
+
# Write-path overwrite/delete resolution via the DuckDB pushdown probe.
|
|
161
|
+
# Disabled by default: the polars fallback reads only the projected key
|
|
162
|
+
# columns through the storage SDK and needs no httpfs extension, so it works
|
|
163
|
+
# in environments without one (or without internet to install it). Enable
|
|
164
|
+
# only where httpfs is available and the probe's row-group skipping is worth
|
|
165
|
+
# it (e.g. very wide tables / many overlapping files).
|
|
166
|
+
SUPERTABLE_DUCKDB_WRITE_PROBE: bool = False # SUPERTABLE_DUCKDB_WRITE_PROBE
|
|
160
167
|
# Deletion-vector (tombstone) table cache. Each entry is a small
|
|
161
168
|
# `DISTINCT __rowid__` table keyed by the stable tombstone path; the
|
|
162
169
|
# tombstone view ANTI JOINs it instead of re-reading the parquet every
|
|
@@ -437,6 +444,7 @@ def _build_settings() -> Settings:
|
|
|
437
444
|
SUPERTABLE_DUCKDB_MATERIALIZE=_env_str("SUPERTABLE_DUCKDB_MATERIALIZE", "view"),
|
|
438
445
|
SUPERTABLE_DUCKDB_PRESIGNED=_env_bool("SUPERTABLE_DUCKDB_PRESIGNED", False),
|
|
439
446
|
SUPERTABLE_DUCKDB_USE_HTTPFS=_env_bool("SUPERTABLE_DUCKDB_USE_HTTPFS", False),
|
|
447
|
+
SUPERTABLE_DUCKDB_WRITE_PROBE=_env_bool("SUPERTABLE_DUCKDB_WRITE_PROBE", False),
|
|
440
448
|
SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_MAX_PER_TABLE=_env_int("SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_MAX_PER_TABLE", 8),
|
|
441
449
|
SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_TTL_SEC=_env_int("SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_TTL_SEC", 300),
|
|
442
450
|
SUPERTABLE_DEBUG_TIMINGS=_env_bool("SUPERTABLE_DEBUG_TIMINGS", False),
|
|
@@ -1316,11 +1316,13 @@ def resolve_overwrite_writes(
|
|
|
1316
1316
|
) -> Tuple[polars.DataFrame, List[Tuple[str, int]]]:
|
|
1317
1317
|
"""Single-pass overwrite resolution: stale filtering + delete-vector pairs.
|
|
1318
1318
|
|
|
1319
|
-
Returns ``(filtered_incoming_df, delete_pairs)
|
|
1320
|
-
|
|
1319
|
+
Returns ``(filtered_incoming_df, delete_pairs)``. When the DuckDB pushdown
|
|
1320
|
+
probe is enabled (``SUPERTABLE_DUCKDB_WRITE_PROBE``) it is computed from ONE
|
|
1321
|
+
probe over the overlapping files. Falls back to the original polars
|
|
1321
1322
|
full-read path (``filter_stale_incoming_rows`` + ``identify_deleted_rowids``)
|
|
1322
|
-
when
|
|
1323
|
-
probed — semantics are identical on both
|
|
1323
|
+
when the probe is disabled (the default), DuckDB is unavailable, the probe
|
|
1324
|
+
fails, or the file schema can't be probed — semantics are identical on both
|
|
1325
|
+
paths.
|
|
1324
1326
|
|
|
1325
1327
|
*newer_than_col* falsy ⇒ no stale filtering (delete/upsert without conflict
|
|
1326
1328
|
resolution); the incoming df is returned unchanged and every overlapping row
|
|
@@ -1343,9 +1345,15 @@ def resolve_overwrite_writes(
|
|
|
1343
1345
|
f"{incoming_keys.height} unique incoming key(s) on {overwrite_columns}, "
|
|
1344
1346
|
f"newer_than={newer_than_col}"
|
|
1345
1347
|
)
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1348
|
+
# The DuckDB pushdown probe is opt-in (SUPERTABLE_DUCKDB_WRITE_PROBE). When
|
|
1349
|
+
# disabled (the default), skip it entirely and use the polars fallback below,
|
|
1350
|
+
# which reads only the projected key columns via the storage SDK and needs no
|
|
1351
|
+
# httpfs extension — the safe path for environments without one.
|
|
1352
|
+
matched = None
|
|
1353
|
+
if settings.SUPERTABLE_DUCKDB_WRITE_PROBE:
|
|
1354
|
+
matched = _duckdb_probe_overlap_matches(
|
|
1355
|
+
overlap_true, overwrite_columns, newer_than_col, incoming_keys, profiler=p,
|
|
1356
|
+
)
|
|
1349
1357
|
if matched is not None:
|
|
1350
1358
|
try:
|
|
1351
1359
|
return _derive_stale_and_deletes(
|
|
@@ -20,21 +20,37 @@ local files regardless of the ambient STORAGE_TYPE.
|
|
|
20
20
|
"""
|
|
21
21
|
from __future__ import annotations
|
|
22
22
|
|
|
23
|
+
import dataclasses
|
|
23
24
|
from unittest.mock import patch
|
|
24
25
|
|
|
25
26
|
import polars as pl
|
|
26
27
|
import pyarrow.parquet as pq
|
|
27
28
|
import pytest
|
|
28
29
|
|
|
30
|
+
import supertable.processing as st_processing
|
|
29
31
|
from supertable.processing import (
|
|
30
32
|
resolve_overwrite_writes,
|
|
31
33
|
filter_stale_incoming_rows,
|
|
32
34
|
identify_deleted_rowids,
|
|
33
35
|
)
|
|
36
|
+
from supertable.config.settings import settings
|
|
34
37
|
from supertable.storage.local_storage import LocalStorage
|
|
35
38
|
from supertable.utils.profiler import Profiler
|
|
36
39
|
|
|
37
40
|
|
|
41
|
+
@pytest.fixture(autouse=True)
|
|
42
|
+
def _enable_write_probe(monkeypatch):
|
|
43
|
+
"""These tests validate the DuckDB pushdown probe path, which is opt-in
|
|
44
|
+
(``SUPERTABLE_DUCKDB_WRITE_PROBE``, default off). Force it on so the probe
|
|
45
|
+
is actually exercised; without this the gate in ``resolve_overwrite_writes``
|
|
46
|
+
would route every call to the polars fallback and the probe assertions
|
|
47
|
+
(``probe_files`` present, no ``overwrite_resolve_fallback``) would be vacuous."""
|
|
48
|
+
monkeypatch.setattr(
|
|
49
|
+
st_processing, "settings",
|
|
50
|
+
dataclasses.replace(settings, SUPERTABLE_DUCKDB_WRITE_PROBE=True),
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
38
54
|
# ---------------------------------------------------------------------------
|
|
39
55
|
# Helpers
|
|
40
56
|
# ---------------------------------------------------------------------------
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# supertable/tests/test_write_probe_gate.py
|
|
2
|
+
"""Gate test for ``SUPERTABLE_DUCKDB_WRITE_PROBE``.
|
|
3
|
+
|
|
4
|
+
The DuckDB pushdown probe in the overwrite/delete write path is opt-in and
|
|
5
|
+
disabled by default. Environments without the httpfs extension (or without
|
|
6
|
+
internet to install it) must NOT stall on a DuckDB httpfs install; they use the
|
|
7
|
+
polars fallback, which reads only the projected key columns through the storage
|
|
8
|
+
SDK. These tests pin the gate's contract:
|
|
9
|
+
|
|
10
|
+
* flag OFF (default) -> the probe is never called; resolution goes through the
|
|
11
|
+
polars fallback (profiler 'overwrite_resolve_fallback' set, no 'probe_files').
|
|
12
|
+
* flag ON -> the probe IS called ('probe_files' set).
|
|
13
|
+
* both produce identical (filtered rows, delete pairs) -- the gate changes
|
|
14
|
+
only the mechanism, never the result (the fallback is the semantic oracle).
|
|
15
|
+
"""
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import dataclasses
|
|
19
|
+
from unittest.mock import patch
|
|
20
|
+
|
|
21
|
+
import polars as pl
|
|
22
|
+
import pyarrow.parquet as pq
|
|
23
|
+
import pytest
|
|
24
|
+
|
|
25
|
+
import supertable.processing as st_processing
|
|
26
|
+
from supertable.config.settings import settings
|
|
27
|
+
from supertable.processing import resolve_overwrite_writes
|
|
28
|
+
from supertable.storage.local_storage import LocalStorage
|
|
29
|
+
from supertable.utils.profiler import Profiler
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@pytest.fixture(autouse=True)
|
|
33
|
+
def _local_storage():
|
|
34
|
+
"""Force both probe and fallback reads through a real LocalStorage so the
|
|
35
|
+
on-disk tmp parquet files are read identically regardless of STORAGE_TYPE."""
|
|
36
|
+
with patch("supertable.processing._get_storage", return_value=LocalStorage()):
|
|
37
|
+
yield
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _write(d, name, df):
|
|
41
|
+
path = str(d / name)
|
|
42
|
+
pq.write_table(df.to_arrow(), path)
|
|
43
|
+
return (path, True, (d / name).stat().st_size)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _set_probe(monkeypatch, enabled: bool):
|
|
47
|
+
monkeypatch.setattr(
|
|
48
|
+
st_processing, "settings",
|
|
49
|
+
dataclasses.replace(settings, SUPERTABLE_DUCKDB_WRITE_PROBE=enabled),
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _spy_probe(monkeypatch):
|
|
54
|
+
"""Wrap the real probe to count calls without altering its behavior."""
|
|
55
|
+
calls = {"n": 0}
|
|
56
|
+
real = st_processing._duckdb_probe_overlap_matches
|
|
57
|
+
|
|
58
|
+
def _counting(*a, **k):
|
|
59
|
+
calls["n"] += 1
|
|
60
|
+
return real(*a, **k)
|
|
61
|
+
|
|
62
|
+
monkeypatch.setattr(st_processing, "_duckdb_probe_overlap_matches", _counting)
|
|
63
|
+
return calls
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _resolve(incoming, files, keys, ntc, prof):
|
|
67
|
+
return resolve_overwrite_writes(
|
|
68
|
+
incoming_df=incoming, overlapping_files=files,
|
|
69
|
+
overwrite_columns=keys, newer_than_col=ntc, profiler=prof,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def test_probe_disabled_by_default_uses_fallback(tmp_path, monkeypatch):
|
|
74
|
+
f = _write(tmp_path, "a.parquet", pl.DataFrame(
|
|
75
|
+
{"__rowid__": [1], "user_id": [5], "name": ["Alice"], "updated_at": [7]}))
|
|
76
|
+
incoming = pl.DataFrame({"user_id": [5], "name": ["Bob"], "updated_at": [9]})
|
|
77
|
+
|
|
78
|
+
_set_probe(monkeypatch, False)
|
|
79
|
+
calls = _spy_probe(monkeypatch)
|
|
80
|
+
prof = Profiler()
|
|
81
|
+
filt, pairs = _resolve(incoming, {f}, ["user_id"], "updated_at", prof)
|
|
82
|
+
|
|
83
|
+
counts = prof.emit_counts()
|
|
84
|
+
assert calls["n"] == 0, "probe must NOT be called when the flag is off"
|
|
85
|
+
assert "overwrite_resolve_fallback" in counts, f"fallback not taken; counts={counts}"
|
|
86
|
+
assert "probe_files" not in counts, f"probe ran despite flag off; counts={counts}"
|
|
87
|
+
# Correct result via the fallback: the newer incoming row survives and
|
|
88
|
+
# tombstones the existing row's __rowid__.
|
|
89
|
+
assert filt.height == 1
|
|
90
|
+
assert pairs == [(f[0], 1)]
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def test_probe_enabled_calls_probe(tmp_path, monkeypatch):
|
|
94
|
+
f = _write(tmp_path, "a.parquet", pl.DataFrame(
|
|
95
|
+
{"__rowid__": [1], "user_id": [5], "name": ["Alice"], "updated_at": [7]}))
|
|
96
|
+
incoming = pl.DataFrame({"user_id": [5], "name": ["Bob"], "updated_at": [9]})
|
|
97
|
+
|
|
98
|
+
_set_probe(monkeypatch, True)
|
|
99
|
+
calls = _spy_probe(monkeypatch)
|
|
100
|
+
prof = Profiler()
|
|
101
|
+
filt, pairs = _resolve(incoming, {f}, ["user_id"], "updated_at", prof)
|
|
102
|
+
|
|
103
|
+
counts = prof.emit_counts()
|
|
104
|
+
assert calls["n"] == 1, "probe must be called when the flag is on"
|
|
105
|
+
assert "probe_files" in counts, f"probe did not run; counts={counts}"
|
|
106
|
+
assert filt.height == 1
|
|
107
|
+
assert pairs == [(f[0], 1)]
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def test_gate_result_identical_on_and_off(tmp_path, monkeypatch):
|
|
111
|
+
"""The flag changes only the mechanism: filtered rows + delete pairs match.
|
|
112
|
+
|
|
113
|
+
user_id=5 is newer (9 > 7) -> survives + tombstones rowid 1; user_id=6 is
|
|
114
|
+
stale (3 < 7) -> dropped, no tombstone. Identical on both code paths.
|
|
115
|
+
"""
|
|
116
|
+
f = _write(tmp_path, "d.parquet", pl.DataFrame(
|
|
117
|
+
{"__rowid__": [1, 2], "user_id": [5, 6], "name": ["A", "B"],
|
|
118
|
+
"updated_at": [7, 7]}))
|
|
119
|
+
incoming = pl.DataFrame(
|
|
120
|
+
{"user_id": [5, 6], "name": ["X", "Y"], "updated_at": [9, 3]})
|
|
121
|
+
|
|
122
|
+
def _run(enabled):
|
|
123
|
+
_set_probe(monkeypatch, enabled)
|
|
124
|
+
filt, pairs = _resolve(incoming, {f}, ["user_id"], "updated_at", Profiler())
|
|
125
|
+
rows = sorted(
|
|
126
|
+
filt.select(["user_id", "name", "updated_at"]).to_dicts(), key=repr
|
|
127
|
+
)
|
|
128
|
+
return rows, sorted(pairs)
|
|
129
|
+
|
|
130
|
+
assert _run(True) == _run(False)
|
|
@@ -176,6 +176,7 @@ supertable/tests/test_stats_schema_snapshot.py
|
|
|
176
176
|
supertable/tests/test_super_table.py
|
|
177
177
|
supertable/tests/test_supertable_all.py
|
|
178
178
|
supertable/tests/test_system_query.py
|
|
179
|
+
supertable/tests/test_write_probe_gate.py
|
|
179
180
|
supertable/utils/__init__.py
|
|
180
181
|
supertable/utils/helper.py
|
|
181
182
|
supertable/utils/profiler.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s01_01_01_create_super_table.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s02_02_write_single_data.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s03_02_01_read_super_data_ok.py
RENAMED
|
File without changes
|
{supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s03_02_02_read_table_data_ok.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s03_07_02_estimate_files.py
RENAMED
|
File without changes
|
{supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s03_08_read_snapshot_history.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s05_02_delete_super_table.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|