supertable 2.3.3__tar.gz → 2.3.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. {supertable-2.3.3/supertable.egg-info → supertable-2.3.5}/PKG-INFO +1 -1
  2. {supertable-2.3.3 → supertable-2.3.5}/pyproject.toml +1 -1
  3. {supertable-2.3.3 → supertable-2.3.5}/setup.py +1 -1
  4. {supertable-2.3.3 → supertable-2.3.5}/supertable/__init__.py +1 -1
  5. {supertable-2.3.3 → supertable-2.3.5}/supertable/config/defaults.py +6 -0
  6. supertable-2.3.5/supertable/config/homedir.py +96 -0
  7. {supertable-2.3.3 → supertable-2.3.5}/supertable/config/settings.py +21 -0
  8. {supertable-2.3.3 → supertable-2.3.5}/supertable/data_classes.py +9 -1
  9. {supertable-2.3.3 → supertable-2.3.5}/supertable/data_reader.py +3 -0
  10. {supertable-2.3.3 → supertable-2.3.5}/supertable/data_writer.py +122 -6
  11. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/defaults.py +3 -1
  12. {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/duckdb_lite.py +29 -1
  13. {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/duckdb_pro.py +34 -1
  14. {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/engine_common.py +349 -66
  15. {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/tests/test_engine.py +548 -0
  16. {supertable-2.3.3 → supertable-2.3.5}/supertable/processing.py +204 -32
  17. {supertable-2.3.3 → supertable-2.3.5}/supertable/simple_table.py +0 -9
  18. {supertable-2.3.3 → supertable-2.3.5}/supertable/storage/azure_storage.py +7 -2
  19. {supertable-2.3.3 → supertable-2.3.5}/supertable/storage/gcp_storage.py +7 -2
  20. {supertable-2.3.3 → supertable-2.3.5}/supertable/storage/local_storage.py +4 -4
  21. {supertable-2.3.3 → supertable-2.3.5}/supertable/storage/minio_storage.py +7 -2
  22. {supertable-2.3.3 → supertable-2.3.5}/supertable/storage/s3_storage.py +7 -2
  23. {supertable-2.3.3 → supertable-2.3.5}/supertable/storage/storage_interface.py +21 -2
  24. {supertable-2.3.3 → supertable-2.3.5}/supertable/super_table.py +0 -6
  25. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_data_reader.py +5 -0
  26. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_data_writer_comprehensive.py +2 -1
  27. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_processing_stats.py +3 -1
  28. {supertable-2.3.3 → supertable-2.3.5/supertable.egg-info}/PKG-INFO +1 -1
  29. supertable-2.3.3/supertable/config/homedir.py +0 -62
  30. {supertable-2.3.3 → supertable-2.3.5}/LICENSE +0 -0
  31. {supertable-2.3.3 → supertable-2.3.5}/README.md +0 -0
  32. {supertable-2.3.3 → supertable-2.3.5}/requirements.txt +0 -0
  33. {supertable-2.3.3 → supertable-2.3.5}/setup.cfg +0 -0
  34. {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/__init__.py +0 -0
  35. {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/admin.py +0 -0
  36. {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/chain.py +0 -0
  37. {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/consumers.py +0 -0
  38. {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/crypto.py +0 -0
  39. {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/events.py +0 -0
  40. {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/export.py +0 -0
  41. {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/logger.py +0 -0
  42. {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/middleware.py +0 -0
  43. {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/reader.py +0 -0
  44. {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/retention.py +0 -0
  45. {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/tests/__init__.py +0 -0
  46. {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/tests/test_chain.py +0 -0
  47. {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/tests/test_crypto.py +0 -0
  48. {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/tests/test_emit.py +0 -0
  49. {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/tests/test_events.py +0 -0
  50. {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/tests/test_retention.py +0 -0
  51. {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/writer_parquet.py +0 -0
  52. {supertable-2.3.3 → supertable-2.3.5}/supertable/audit/writer_redis.py +0 -0
  53. {supertable-2.3.3 → supertable-2.3.5}/supertable/config/__init__.py +0 -0
  54. {supertable-2.3.3 → supertable-2.3.5}/supertable/config/tests/__init__.py +0 -0
  55. {supertable-2.3.3 → supertable-2.3.5}/supertable/config/tests/test_defaults.py +0 -0
  56. {supertable-2.3.3 → supertable-2.3.5}/supertable/config/tests/test_homedir.py +0 -0
  57. {supertable-2.3.3 → supertable-2.3.5}/supertable/config/tests/test_settings.py +0 -0
  58. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/__init__.py +0 -0
  59. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/__init__.py +0 -0
  60. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/__main__.py +0 -0
  61. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/check_filter_builder.py +0 -0
  62. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/controller.py +0 -0
  63. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/data_writer_helpers.py +0 -0
  64. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/dummy_data.py +0 -0
  65. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/read_parquet_header.py +0 -0
  66. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s01_01_01_create_super_table.py +0 -0
  67. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s01_01_02_enable_mirroring_formats.py +0 -0
  68. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s01_02_create_roles.py +0 -0
  69. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s01_03_create_users.py +0 -0
  70. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s02_01_write_dummy_data.py +0 -0
  71. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s02_02_write_single_data.py +0 -0
  72. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s02_03_01_write_staging.py +0 -0
  73. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s02_03_02_create_pipe.py +0 -0
  74. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s02_04_01_write_monitoring_simple.py +0 -0
  75. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s02_04_02_write_monitoring_parallel.py +0 -0
  76. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s02_05_write_tombstone.py +0 -0
  77. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s03_01_read_data_error.py +0 -0
  78. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s03_02_01_read_super_data_ok.py +0 -0
  79. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s03_02_02_read_table_data_ok.py +0 -0
  80. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s03_03_read_meta.py +0 -0
  81. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s03_04_read_staging.py +0 -0
  82. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s03_06_01_read_roles.py +0 -0
  83. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s03_06_02_read_user.py +0 -0
  84. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s03_07_01_estimate_read.py +0 -0
  85. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s03_07_02_estimate_files.py +0 -0
  86. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s03_08_read_snapshot_history.py +0 -0
  87. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s04_01_03_delete_pipe.py +0 -0
  88. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s05_01_delete_table.py +0 -0
  89. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/quickstart/s05_02_delete_super_table.py +0 -0
  90. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/webshop/__init__.py +0 -0
  91. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/webshop/core.py +0 -0
  92. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/webshop/defaults.py +0 -0
  93. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/webshop/generate.py +0 -0
  94. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/webshop/load.py +0 -0
  95. {supertable-2.3.3 → supertable-2.3.5}/supertable/demo/webshop/topup.py +0 -0
  96. {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/__init__.py +0 -0
  97. {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/data_estimator.py +0 -0
  98. {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/engine_config.py +0 -0
  99. {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/engine_enum.py +0 -0
  100. {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/executor.py +0 -0
  101. {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/plan_stats.py +0 -0
  102. {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/spark_thrift.py +0 -0
  103. {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/tests/__init__.py +0 -0
  104. {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/tests/conftest.py +0 -0
  105. {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/tests/test_engine_config.py +0 -0
  106. {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/tests/test_engine_routing.py +0 -0
  107. {supertable-2.3.3 → supertable-2.3.5}/supertable/engine/tests/test_engine_spill.py +0 -0
  108. {supertable-2.3.3 → supertable-2.3.5}/supertable/errors.py +0 -0
  109. {supertable-2.3.3 → supertable-2.3.5}/supertable/locking/__init__.py +0 -0
  110. {supertable-2.3.3 → supertable-2.3.5}/supertable/locking/benchmarks/__init__.py +0 -0
  111. {supertable-2.3.3 → supertable-2.3.5}/supertable/locking/benchmarks/benchmark_locking.py +0 -0
  112. {supertable-2.3.3 → supertable-2.3.5}/supertable/locking/benchmarks/measure_lock_speed.py +0 -0
  113. {supertable-2.3.3 → supertable-2.3.5}/supertable/locking/benchmarks/measure_lock_time.py +0 -0
  114. {supertable-2.3.3 → supertable-2.3.5}/supertable/locking/file_lock.py +0 -0
  115. {supertable-2.3.3 → supertable-2.3.5}/supertable/locking/redis_lock.py +0 -0
  116. {supertable-2.3.3 → supertable-2.3.5}/supertable/locking/tests/__init__.py +0 -0
  117. {supertable-2.3.3 → supertable-2.3.5}/supertable/locking/tests/test_file_lock.py +0 -0
  118. {supertable-2.3.3 → supertable-2.3.5}/supertable/locking/tests/test_redis_lock.py +0 -0
  119. {supertable-2.3.3 → supertable-2.3.5}/supertable/logging.py +0 -0
  120. {supertable-2.3.3 → supertable-2.3.5}/supertable/meta_reader.py +0 -0
  121. {supertable-2.3.3 → supertable-2.3.5}/supertable/mirroring/__init__.py +0 -0
  122. {supertable-2.3.3 → supertable-2.3.5}/supertable/mirroring/mirror_delta.py +0 -0
  123. {supertable-2.3.3 → supertable-2.3.5}/supertable/mirroring/mirror_formats.py +0 -0
  124. {supertable-2.3.3 → supertable-2.3.5}/supertable/mirroring/mirror_iceberg.py +0 -0
  125. {supertable-2.3.3 → supertable-2.3.5}/supertable/mirroring/mirror_parquet.py +0 -0
  126. {supertable-2.3.3 → supertable-2.3.5}/supertable/monitoring/__init__.py +0 -0
  127. {supertable-2.3.3 → supertable-2.3.5}/supertable/monitoring/partitions.py +0 -0
  128. {supertable-2.3.3 → supertable-2.3.5}/supertable/monitoring_writer.py +0 -0
  129. {supertable-2.3.3 → supertable-2.3.5}/supertable/plan_extender.py +0 -0
  130. {supertable-2.3.3 → supertable-2.3.5}/supertable/query_plan_manager.py +0 -0
  131. {supertable-2.3.3 → supertable-2.3.5}/supertable/rbac/__init__.py +0 -0
  132. {supertable-2.3.3 → supertable-2.3.5}/supertable/rbac/access_control.py +0 -0
  133. {supertable-2.3.3 → supertable-2.3.5}/supertable/rbac/filter_builder.py +0 -0
  134. {supertable-2.3.3 → supertable-2.3.5}/supertable/rbac/permissions.py +0 -0
  135. {supertable-2.3.3 → supertable-2.3.5}/supertable/rbac/role_manager.py +0 -0
  136. {supertable-2.3.3 → supertable-2.3.5}/supertable/rbac/row_column_security.py +0 -0
  137. {supertable-2.3.3 → supertable-2.3.5}/supertable/rbac/tests/test_filter_builder.py +0 -0
  138. {supertable-2.3.3 → supertable-2.3.5}/supertable/rbac/tests/test_rbac.py +0 -0
  139. {supertable-2.3.3 → supertable-2.3.5}/supertable/rbac/tests/test_rbac_per_table.py +0 -0
  140. {supertable-2.3.3 → supertable-2.3.5}/supertable/rbac/user_manager.py +0 -0
  141. {supertable-2.3.3 → supertable-2.3.5}/supertable/redis_catalog.py +0 -0
  142. {supertable-2.3.3 → supertable-2.3.5}/supertable/redis_connector.py +0 -0
  143. {supertable-2.3.3 → supertable-2.3.5}/supertable/redis_infra.py +0 -0
  144. {supertable-2.3.3 → supertable-2.3.5}/supertable/redis_keys.py +0 -0
  145. {supertable-2.3.3 → supertable-2.3.5}/supertable/staging_area.py +0 -0
  146. {supertable-2.3.3 → supertable-2.3.5}/supertable/storage/__init__.py +0 -0
  147. {supertable-2.3.3 → supertable-2.3.5}/supertable/storage/storage_factory.py +0 -0
  148. {supertable-2.3.3 → supertable-2.3.5}/supertable/storage/tests/test_storage.py +0 -0
  149. {supertable-2.3.3 → supertable-2.3.5}/supertable/super_pipe.py +0 -0
  150. {supertable-2.3.3 → supertable-2.3.5}/supertable/system_query.py +0 -0
  151. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/__init__.py +0 -0
  152. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_align_to_schema_fix.py +0 -0
  153. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_create_if_missing.py +0 -0
  154. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_data_reader_preflight.py +0 -0
  155. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_data_writer.py +0 -0
  156. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_data_writer_compact.py +0 -0
  157. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_errors.py +0 -0
  158. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_meta_reader.py +0 -0
  159. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_monitoring_partitions.py +0 -0
  160. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_monitoring_sink_guard.py +0 -0
  161. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_newer_than.py +0 -0
  162. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_parquet_statistics.py +0 -0
  163. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_processing.py +0 -0
  164. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_processing_compact_resources.py +0 -0
  165. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_query_sql.py +0 -0
  166. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_read_pruning_differential.py +0 -0
  167. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_read_pruning_integration.py +0 -0
  168. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_redis_key_prefix.py +0 -0
  169. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_resolve_overwrite_writes.py +0 -0
  170. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_simple_table.py +0 -0
  171. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_stats_cache.py +0 -0
  172. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_stats_pruning.py +0 -0
  173. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_stats_schema_snapshot.py +0 -0
  174. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_super_table.py +0 -0
  175. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_supertable_all.py +0 -0
  176. {supertable-2.3.3 → supertable-2.3.5}/supertable/tests/test_system_query.py +0 -0
  177. {supertable-2.3.3 → supertable-2.3.5}/supertable/utils/__init__.py +0 -0
  178. {supertable-2.3.3 → supertable-2.3.5}/supertable/utils/helper.py +0 -0
  179. {supertable-2.3.3 → supertable-2.3.5}/supertable/utils/profiler.py +0 -0
  180. {supertable-2.3.3 → supertable-2.3.5}/supertable/utils/sql_parser.py +0 -0
  181. {supertable-2.3.3 → supertable-2.3.5}/supertable/utils/tests/test_sql_parser_columns.py +0 -0
  182. {supertable-2.3.3 → supertable-2.3.5}/supertable/utils/timer.py +0 -0
  183. {supertable-2.3.3 → supertable-2.3.5}/supertable.egg-info/SOURCES.txt +0 -0
  184. {supertable-2.3.3 → supertable-2.3.5}/supertable.egg-info/dependency_links.txt +0 -0
  185. {supertable-2.3.3 → supertable-2.3.5}/supertable.egg-info/entry_points.txt +0 -0
  186. {supertable-2.3.3 → supertable-2.3.5}/supertable.egg-info/requires.txt +0 -0
  187. {supertable-2.3.3 → supertable-2.3.5}/supertable.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: supertable
3
- Version: 2.3.3
3
+ Version: 2.3.5
4
4
  Summary: SuperTable — versioned data lake library for SQL analytics on Parquet + Redis.
5
5
  Author: Levente Kupas
6
6
  Author-email: Levente Kupas <lkupas@kladnasoft.com>
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "supertable"
7
- version = "2.3.3"
7
+ version = "2.3.5"
8
8
  description = "SuperTable — versioned data lake library for SQL analytics on Parquet + Redis."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -19,7 +19,7 @@ long_description = readme.read_text(encoding="utf-8") if readme.exists() else ""
19
19
 
20
20
  setup(
21
21
  name="supertable",
22
- version="2.3.3",
22
+ version="2.3.5",
23
23
  description="SuperTable — versioned data lake library for SQL analytics on Parquet + Redis.",
24
24
  long_description=long_description,
25
25
  long_description_content_type="text/markdown",
@@ -25,7 +25,7 @@ See the ``supertable.demo`` package for runnable end-to-end demos and the
25
25
  project documentation for the full API surface.
26
26
  """
27
27
 
28
- __version__ = "2.3.3"
28
+ __version__ = "2.3.5"
29
29
 
30
30
  # Re-export the core public surface so users can do ``from supertable import …``
31
31
  # instead of remembering submodule paths.
@@ -17,6 +17,12 @@ handler.setFormatter(colorlog.ColoredFormatter(
17
17
  logging.basicConfig(level=logging.INFO, handlers=[handler])
18
18
  logger = logging.getLogger(__name__)
19
19
 
20
+ # Quiet noisy third-party HTTP client loggers. At DEBUG these emit one line
21
+ # per request (connection setup + every HEAD/GET/PUT), which drowns out
22
+ # SuperTable's own logs. WARNING keeps genuine connection problems visible.
23
+ for _noisy_logger in ("urllib3", "botocore", "boto3", "s3transfer", "boto"):
24
+ logging.getLogger(_noisy_logger).setLevel(logging.WARNING)
25
+
20
26
  _VALID_LOG_LEVELS = frozenset({"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"})
21
27
 
22
28
  @dataclass(slots=True)
@@ -0,0 +1,96 @@
1
+ import os
2
+ import sys
3
+ import tempfile
4
+
5
+ from supertable.config.settings import settings
6
+ from supertable.config.defaults import logger
7
+
8
+ # If this file is located in a subdirectory, adjust the path logic as needed.
9
+ # Currently appending ".." from __file__ to add the project root directory
10
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
11
+
12
+ # ---------- lazy home directory resolution ----------
13
+ _resolved_home: str | None = None
14
+
15
+ def _is_writable_dir(path: str) -> bool:
16
+ """Create *path* if needed and verify we can actually write a file in it.
17
+
18
+ ``os.access(..., W_OK)`` is unreliable under containers, ACLs and
19
+ root-squashed mounts, so probe with a real create+unlink: this is the
20
+ difference between a home that merely *resolves* and one DuckDB can root
21
+ its temp/spill, cache and extension dirs under.
22
+ """
23
+ try:
24
+ os.makedirs(path, exist_ok=True)
25
+ with tempfile.NamedTemporaryFile(dir=path):
26
+ pass
27
+ return True
28
+ except OSError:
29
+ return False
30
+
31
+ def _resolve_app_home() -> str:
32
+ """
33
+ Resolve, expand, and normalise the application home directory once.
34
+
35
+ The home must be *writable*, not just resolvable: DuckDB roots its
36
+ temp/spill, external-cache and extension directories here, so a
37
+ non-writable home silently breaks every write (the probe fails with
38
+ ``errno 13`` and falls back to the slow full-read path). We therefore
39
+ verify writability and, when the configured home is not usable, fall back
40
+ to ``<tempdir>/supertable`` with a loud warning rather than returning a
41
+ path that only looks valid.
42
+ """
43
+ global _resolved_home
44
+ if _resolved_home is not None:
45
+ return _resolved_home
46
+
47
+ raw = settings.SUPERTABLE_HOME
48
+ expanded = os.path.abspath(os.path.expanduser(raw))
49
+
50
+ if _is_writable_dir(expanded):
51
+ logger.debug(f"Ensured app home directory exists: {expanded}")
52
+ _resolved_home = expanded
53
+ return _resolved_home
54
+
55
+ fallback = os.path.join(tempfile.gettempdir(), "supertable")
56
+ if _is_writable_dir(fallback):
57
+ logger.warning(
58
+ f"SUPERTABLE_HOME={expanded!r} is not writable; falling back to "
59
+ f"{fallback!r}. Set SUPERTABLE_HOME to a writable directory to "
60
+ f"silence this — DuckDB temp/spill, cache and extensions live under it."
61
+ )
62
+ _resolved_home = fallback
63
+ return _resolved_home
64
+
65
+ raise RuntimeError(
66
+ f"No writable application home: tried SUPERTABLE_HOME={expanded!r} and "
67
+ f"fallback {fallback!r}. Set SUPERTABLE_HOME to a writable directory."
68
+ )
69
+
70
+ def change_to_app_home(home_dir: str | None = None) -> None:
71
+ """
72
+ Attempts to change the current working directory to `home_dir`.
73
+ If home_dir is not provided, uses the resolved app home.
74
+ Logs the outcome.
75
+ """
76
+ target = home_dir if home_dir else _resolve_app_home()
77
+ expanded_dir = os.path.expanduser(target)
78
+ try:
79
+ os.chdir(expanded_dir)
80
+ logger.debug(f"Changed working directory to {expanded_dir}")
81
+ except Exception as e:
82
+ logger.error(f"Failed to change working directory to {expanded_dir}: {e}")
83
+
84
+ # ---------- eager init (preserves original import-time behaviour) ----------
85
+ _app_home = _resolve_app_home()
86
+ change_to_app_home(_app_home)
87
+ logger.debug(f"Current working directory: {os.getcwd()}")
88
+
89
+ # ---------- public API ----------
90
+
91
+ # Kept for backward compatibility; prefer get_app_home() for the expanded path.
92
+ app_home = _app_home
93
+
94
+ def get_app_home() -> str:
95
+ """Return the fully expanded, absolute application home directory."""
96
+ return _resolve_app_home()
@@ -157,6 +157,25 @@ class Settings:
157
157
  SUPERTABLE_DUCKDB_MATERIALIZE: str = "view" # SUPERTABLE_DUCKDB_MATERIALIZE
158
158
  SUPERTABLE_DUCKDB_PRESIGNED: bool = False # SUPERTABLE_DUCKDB_PRESIGNED
159
159
  SUPERTABLE_DUCKDB_USE_HTTPFS: bool = False # SUPERTABLE_DUCKDB_USE_HTTPFS
160
+ # Deletion-vector (tombstone) table cache. Each entry is a small
161
+ # `DISTINCT __rowid__` table keyed by the stable tombstone path; the
162
+ # tombstone view ANTI JOINs it instead of re-reading the parquet every
163
+ # query. Eviction is purely per-table — a churny table can never evict a
164
+ # slow table's cached deletion-vector:
165
+ # * Idle TTL (below): every entry, including a table's latest, is dropped
166
+ # once it goes unqueried for the TTL window.
167
+ # * Per-table cap (this knob): at most N most-recently-used versions are
168
+ # kept per table, so a burst of rewrites (e.g. 1000 updates in 5 min)
169
+ # retains only the last N rather than all of them.
170
+ # <= 0 disables the cache entirely (inline read_parquet fallback).
171
+ SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_MAX_PER_TABLE: int = 8 # SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_MAX_PER_TABLE
172
+ # Idle TTL (seconds): a cached deletion-vector is dropped once it has gone
173
+ # unqueried for this long; every query that uses it refreshes the timer.
174
+ # Applies to every entry (a table's latest included), so an abandoned table
175
+ # reclaims its cache instead of lingering until the connection resets.
176
+ # <= 0 keeps an entry only while a query references it (no persistence).
177
+ # Defaults to SUPERTABLE_ENGINE_FRESHNESS_SEC (300 s / 5 min).
178
+ SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_TTL_SEC: int = 300 # SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_TTL_SEC
160
179
  SUPERTABLE_DEBUG_TIMINGS: bool = False # SUPERTABLE_DEBUG_TIMINGS
161
180
 
162
181
  # ── Engine Routing / Executor ────────────────────────────────────
@@ -418,6 +437,8 @@ def _build_settings() -> Settings:
418
437
  SUPERTABLE_DUCKDB_MATERIALIZE=_env_str("SUPERTABLE_DUCKDB_MATERIALIZE", "view"),
419
438
  SUPERTABLE_DUCKDB_PRESIGNED=_env_bool("SUPERTABLE_DUCKDB_PRESIGNED", False),
420
439
  SUPERTABLE_DUCKDB_USE_HTTPFS=_env_bool("SUPERTABLE_DUCKDB_USE_HTTPFS", False),
440
+ SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_MAX_PER_TABLE=_env_int("SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_MAX_PER_TABLE", 8),
441
+ SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_TTL_SEC=_env_int("SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_TTL_SEC", 300),
421
442
  SUPERTABLE_DEBUG_TIMINGS=_env_bool("SUPERTABLE_DEBUG_TIMINGS", False),
422
443
 
423
444
  # ── Engine Routing ───────────────────────────────────────────
@@ -61,9 +61,17 @@ class TombstoneDef:
61
61
 
62
62
  - tombstone_path: storage path of the deletion-vector parquet
63
63
  (columns ``__file__`` + ``__rowid__``). ``None`` means no
64
- tombstone exists, so no anti-join is applied.
64
+ tombstone exists, so no anti-join is applied. This may be a
65
+ presigned/object-store URL that rotates per request, so it is
66
+ *not* stable enough to use as a cache key.
67
+ - cache_key: the bare, stable storage key of the same deletion-vector
68
+ parquet (no presign). Stable across pure appends (carry-forward
69
+ returns the previous tombstone), so DuckDB engines use it to key the
70
+ materialised deletion-vector table cache. ``None`` disables caching
71
+ for this alias (falls back to inline ``read_parquet``).
65
72
  """
66
73
  tombstone_path: Optional[str] = None
74
+ cache_key: Optional[str] = None
67
75
 
68
76
 
69
77
  @dataclass
@@ -322,6 +322,9 @@ class DataReader:
322
322
  # storage returns the key unchanged.
323
323
  reflection.tombstone_views[td.alias] = TombstoneDef(
324
324
  tombstone_path=estimator._to_duckdb_path(tomb_path),
325
+ # Bare key (pre-presign) is stable across
326
+ # appends → safe deletion-vector cache key.
327
+ cache_key=tomb_path,
325
328
  )
326
329
  except Exception as te:
327
330
  logger.debug(self._lp(f"[tombstone] leaf lookup failed for {td.alias}: {te}"))
@@ -26,6 +26,7 @@ from supertable.processing import (
26
26
  resolve_overwrite_writes,
27
27
  identify_all_rowids,
28
28
  build_tombstone_file,
29
+ reclaim_fully_dead_files,
29
30
  build_stats_file,
30
31
  extract_stats_rows,
31
32
  probe_ranges_from_df,
@@ -370,6 +371,14 @@ class DataWriter:
370
371
  profiler=profiler,
371
372
  )
372
373
  mark("overlap")
374
+ if overwrite_columns:
375
+ _snap_files = len(last_simple_table.get("resources") or [])
376
+ _cand = sum(1 for _, ov, _ in overlapping_files if ov)
377
+ logger.debug(lp(
378
+ f"step[overlap]: {_cand}/{_snap_files} existing file(s) are overwrite "
379
+ f"candidates on {overwrite_columns} "
380
+ f"(snapshot has no per-file key stats → every file is suspect)"
381
+ ))
373
382
 
374
383
  # --- Stats-driven file pruning (consumer 5a) ----------------------
375
384
  # Narrow the overwrite/delete candidate set using the external stats
@@ -386,6 +395,10 @@ class DataWriter:
386
395
  stored_stats_df = load_stats(stats_file, allow_cache=True, profiler=profiler)
387
396
  if stored_stats_df is not None and stored_stats_df.height > 0:
388
397
  probe = probe_ranges_from_df(dataframe, overwrite_columns)
398
+ _probe_desc = {
399
+ c: (f"{v[0]}[{v[1]}..{v[2]}]" if v else "unconstrained(null/unsupported)")
400
+ for c, v in probe.items()
401
+ }
389
402
  before = len(overlapping_files)
390
403
  overlapping_files = prune_overlapping_files_by_stats(
391
404
  overlapping_files,
@@ -394,8 +407,21 @@ class DataWriter:
394
407
  profiler=profiler,
395
408
  )
396
409
  pruned = before - len(overlapping_files)
410
+ logger.debug(lp(
411
+ f"step[stats-prune]: df-probe {_probe_desc} vs {stored_stats_df.height} "
412
+ f"stored stat row(s) → kept {len(overlapping_files)}/{before}, "
413
+ f"pruned {pruned} (no data file opened)"
414
+ ))
397
415
  if pruned > 0:
398
416
  logger.info(lp(f"stats pruning: skipped {pruned}/{before} candidate files"))
417
+ else:
418
+ logger.debug(lp(
419
+ "step[stats-prune]: stats artifact empty → no pruning, all candidates retained"
420
+ ))
421
+ else:
422
+ logger.debug(lp(
423
+ "step[stats-prune]: snapshot has no stats_file → no pruning, all candidates retained"
424
+ ))
399
425
  mark("stats_prune")
400
426
 
401
427
  # File cache: used only by delete_only's identify_all_rowids below.
@@ -419,6 +445,16 @@ class DataWriter:
419
445
  newer_than_col=newer_than,
420
446
  profiler=profiler,
421
447
  )
448
+ mark("resolve_overwrite")
449
+ _counts = profiler.counts
450
+ _fallback = bool(_counts.get("overwrite_resolve_fallback"))
451
+ logger.debug(lp(
452
+ f"step[probe-resolve] via {'polars-fallback' if _fallback else 'duckdb-pushdown'}: "
453
+ f"matched {_counts.get('probe_rows_matched', _counts.get('delete_rows_matched', 0))} "
454
+ f"existing row(s) on {overwrite_columns} → "
455
+ f"{len(resolved_delete_pairs or [])} (file,__rowid__) delete pair(s); "
456
+ f"{dataframe.height}/{pre_filter_count} incoming row(s) survive"
457
+ ))
422
458
  if newer_than:
423
459
  skipped = pre_filter_count - dataframe.height
424
460
  if skipped > 0:
@@ -472,6 +508,17 @@ class DataWriter:
472
508
  new_resources = []
473
509
  sunset_files = set()
474
510
 
511
+ # Load the current deletion-vector once: used both to exclude
512
+ # already-tombstoned rows from this write's deletes (below) and,
513
+ # via prev_df, to extend the vector without a second read.
514
+ prev_dv_df = (
515
+ _read_parquet_safe(prev_tombstone_path, profiler=profiler)
516
+ if prev_tombstone_path else None
517
+ )
518
+ prev_dv_rowids = set()
519
+ if prev_dv_df is not None and "__rowid__" in prev_dv_df.columns:
520
+ prev_dv_rowids = set(prev_dv_df.get_column("__rowid__").to_list())
521
+
475
522
  # 1. Identify which existing rows this write deletes/replaces.
476
523
  # overwrite_columns drives the anti-join key (delete + upsert);
477
524
  # pure appends (no overwrite_columns) tombstone nothing. The
@@ -487,8 +534,26 @@ class DataWriter:
487
534
  file_cache=file_cache,
488
535
  profiler=profiler,
489
536
  )
537
+
538
+ # Never re-tombstone rows already in the deletion-vector. The
539
+ # overlap probe (and identify_all_rowids) scan the *physical*
540
+ # files, which still hold logically-deleted rows until
541
+ # compaction; without this filter every write re-counts those
542
+ # already-dead rows — inflating ``deleted`` and forcing a
543
+ # needless tombstone rewrite even when nothing live was removed.
544
+ # Excluding them makes ``deleted`` the true count of live rows
545
+ # removed and lets unchanged writes carry the vector forward.
546
+ if new_delete_pairs and prev_dv_rowids:
547
+ new_delete_pairs = [
548
+ (f, rid) for (f, rid) in new_delete_pairs
549
+ if rid not in prev_dv_rowids
550
+ ]
490
551
  deleted = len(new_delete_pairs)
491
552
  mark("identify_deletes")
553
+ logger.debug(lp(
554
+ f"step[deletes]: tombstoning {deleted} live row(s) this write "
555
+ f"(excluded {len(prev_dv_rowids)} row(s) already in the deletion-vector)"
556
+ ))
492
557
 
493
558
  # 2. Write the incoming rows as a new file (insert/upsert side).
494
559
  # delete_only carries only predicate columns — nothing to insert.
@@ -505,6 +570,10 @@ class DataWriter:
505
570
  else:
506
571
  inserted = 0
507
572
  mark("write_parquet")
573
+ logger.debug(lp(
574
+ f"step[write]: appended {inserted} incoming row(s) as {len(new_resources)} "
575
+ f"new immutable file(s) (no existing data file rewritten)"
576
+ ))
508
577
 
509
578
  # 3. Carry forward + extend the deletion-vector tombstone file.
510
579
  # No new deletes → reuse the previous file (combined_df=None).
@@ -515,6 +584,7 @@ class DataWriter:
515
584
  new_pairs=new_delete_pairs,
516
585
  compression_level=compression_level,
517
586
  profiler=profiler,
587
+ prev_df=prev_dv_df,
518
588
  )
519
589
 
520
590
  # Track the live deletion-vector row count so meta reads can
@@ -527,6 +597,42 @@ class DataWriter:
527
597
  else int(last_simple_table.get("tombstone_rows", 0) or 0)
528
598
  )
529
599
  mark("build_tombstone")
600
+ logger.debug(lp(
601
+ f"step[tombstone]: deletion-vector now {tombstone_rows} row(s) "
602
+ f"({'rewritten' if combined_tombstone_df is not None else 'carried forward unchanged'})"
603
+ ))
604
+
605
+ # 3b. Eager reclamation of fully-dead files. Any existing data
606
+ # file whose every physical row is now tombstoned is 100%
607
+ # dead: drop it from the snapshot for free (no rewrite) and
608
+ # remove its rowids from the vector. Without this, fully
609
+ # deleted files linger until the compaction threshold,
610
+ # bloating the snapshot and getting re-scanned by every later
611
+ # overwrite probe. Only runs when the vector changed this
612
+ # write (combined_tombstone_df is not None) — a carry-forward
613
+ # can create no newly-dead file.
614
+ if combined_tombstone_df is not None:
615
+ reclaimed_files, reclaimed_tomb_path, reclaimed_dv = (
616
+ reclaim_fully_dead_files(
617
+ resources=last_simple_table.get("resources") or [],
618
+ combined_dv=combined_tombstone_df,
619
+ tombstone_dir=tombstone_dir,
620
+ compression_level=compression_level,
621
+ profiler=profiler,
622
+ )
623
+ )
624
+ if reclaimed_files:
625
+ sunset_files |= reclaimed_files
626
+ tombstone_path = reclaimed_tomb_path
627
+ combined_tombstone_df = reclaimed_dv
628
+ tombstone_rows = (
629
+ reclaimed_dv.height if reclaimed_dv is not None else 0
630
+ )
631
+ logger.info(lp(
632
+ f"reclaimed {len(reclaimed_files)} fully-deleted "
633
+ f"file(s); deletion-vector now {tombstone_rows} rows"
634
+ ))
635
+ mark("reclaim_dead_files")
530
636
 
531
637
  # 4. Threshold compaction (two triggers, same physical step):
532
638
  # (a) the deletion-vector grew past max_tombstone_rows, or
@@ -538,7 +644,9 @@ class DataWriter:
538
644
  # rows (hidden on read, never reclaimable). Draining first
539
645
  # guarantees Phase B only ever sees vector-free survivors.
540
646
  post_write_resources = (
541
- (last_simple_table.get("resources") or []) + new_resources
647
+ [r for r in (last_simple_table.get("resources") or [])
648
+ if r.get("file") not in sunset_files]
649
+ + new_resources
542
650
  )
543
651
  compaction_gate = should_compact_small_files(
544
652
  post_write_resources, table_config
@@ -554,9 +662,17 @@ class DataWriter:
554
662
  if tombstone_threshold_hit or compaction_gate:
555
663
  dv_to_drain = combined_tombstone_df
556
664
  if dv_to_drain is None and tombstone_path:
557
- # Pure carry-forward: load the live vector so the merge
558
- # below never sunsets a file it still references.
559
- dv_to_drain = _read_parquet_safe(tombstone_path, profiler=profiler)
665
+ # Pure carry-forward: the pointer is unchanged, so the
666
+ # live vector is exactly the one already loaded at the
667
+ # top of this block — reuse it instead of a second
668
+ # storage read (fall back to a read only if it wasn't
669
+ # loaded, which shouldn't happen when tombstone_path is
670
+ # set, but stays correct if it ever does).
671
+ dv_to_drain = (
672
+ prev_dv_df
673
+ if prev_dv_df is not None
674
+ else _read_parquet_safe(tombstone_path, profiler=profiler)
675
+ )
560
676
  if dv_to_drain is not None and dv_to_drain.height > 0:
561
677
  removed, tomb_new, tomb_sunset = compact_tombstones(
562
678
  snapshot=last_simple_table,
@@ -743,7 +859,7 @@ class DataWriter:
743
859
  schema_json = "{}"
744
860
  _org, _sup = self.super_table.organization, self.super_table.super_name
745
861
  self.catalog.r.set(RK.schema(_org, _sup, simple_name), schema_json)
746
- self.catalog.r.sadd(RK.table_names(_org, _sup), simple_name)
862
+ self.catalog.r.sadd(RK.meta_table_names(_org, _sup), simple_name)
747
863
  except Exception as e:
748
864
  logger.debug(f"[data-writer] schema/table_names Redis write failed: {e}")
749
865
 
@@ -793,7 +909,7 @@ class DataWriter:
793
909
  f"total={total_duration:.3f} | "
794
910
  f"convert={timings.get('convert', 0):.3f} | dedup_ts={timings.get('dedup_ts', 0):.3f} | validate={timings.get('validate', 0):.3f} | "
795
911
  f"lock={timings.get('lock', 0):.3f} | snapshot={timings.get('snapshot', 0):.3f} | "
796
- f"overlap={timings.get('overlap', 0):.3f} | stats_prune={timings.get('stats_prune', 0):.3f} | newer_than={timings.get('newer_than', 0):.3f} | "
912
+ f"overlap={timings.get('overlap', 0):.3f} | stats_prune={timings.get('stats_prune', 0):.3f} | resolve_overwrite={timings.get('resolve_overwrite', 0):.3f} | newer_than={timings.get('newer_than', 0):.3f} | "
797
913
  f"identify_deletes={timings.get('identify_deletes', 0):.3f} | write_parquet={timings.get('write_parquet', 0):.3f} | "
798
914
  f"build_tombstone={timings.get('build_tombstone', 0):.3f} | compact_tombstones={timings.get('compact_tombstones', 0):.3f} | compact_small={timings.get('compact_small', 0):.3f} | build_stats={timings.get('build_stats', 0):.3f} | "
799
915
  f"update_simple={timings.get('update_simple', 0):.3f} | bump_root={timings.get('bump_root', 0):.3f} | "
@@ -14,7 +14,9 @@ from enum import Enum
14
14
 
15
15
  from supertable.config import defaults
16
16
 
17
- logging.getLogger("supertable").setLevel(logging.INFO)
17
+ # Follow the configured SUPERTABLE_LOG_LEVEL (resolved in supertable.config.defaults)
18
+ # instead of hard-pinning INFO, so DEBUG surfaces the detailed write step[...] logs.
19
+ logging.getLogger("supertable").setLevel(defaults.default.LOG_LEVEL)
18
20
 
19
21
  defaults.default.IS_SHOW_TIMING = True
20
22
 
@@ -10,6 +10,7 @@ import duckdb
10
10
  import pandas as pd
11
11
 
12
12
  from supertable.config.defaults import logger
13
+ from supertable.config.settings import settings
13
14
  from supertable.query_plan_manager import QueryPlanManager
14
15
  from supertable.utils.sql_parser import SQLParser
15
16
  from supertable.data_classes import Reflection
@@ -24,6 +25,7 @@ from supertable.engine.engine_common import (
24
25
  apply_runtime_pragmas,
25
26
  create_rbac_view,
26
27
  create_tombstone_view,
28
+ TombstoneCache,
27
29
  )
28
30
 
29
31
 
@@ -56,6 +58,13 @@ class DuckDBLite:
56
58
  self._lock = threading.Lock()
57
59
  self._con: Optional[duckdb.DuckDBPyConnection] = None
58
60
  self._httpfs_configured = False
61
+ # Shared deletion-vector table cache: per-table eviction (idle TTL +
62
+ # per-table version cap), bounded by config. Tables live on the
63
+ # persistent connection and are forgotten when it resets.
64
+ self._tombstone_cache = TombstoneCache(
65
+ settings.SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_MAX_PER_TABLE,
66
+ settings.SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_TTL_SEC,
67
+ )
59
68
 
60
69
  # ------------------------------------------------------------------
61
70
  # Connection lifecycle
@@ -92,6 +101,8 @@ class DuckDBLite:
92
101
  pass
93
102
  self._con = None
94
103
  self._httpfs_configured = False
104
+ # Tables died with the connection — just forget the registry.
105
+ self._tombstone_cache.clear_registry()
95
106
  logger.warning("[duckdb.lite] connection reset")
96
107
 
97
108
  # ------------------------------------------------------------------
@@ -163,6 +174,8 @@ class DuckDBLite:
163
174
 
164
175
  # Create per-query VIEWs. Dropped in finally regardless of outcome.
165
176
  created_views: List[str] = []
177
+ # Deletion-vector cache keys acquired this query — released in finally.
178
+ acquired_dv_keys: List[str] = []
166
179
  try:
167
180
  for alias, table_name in alias_to_table_name.items():
168
181
  files = alias_to_files[alias]
@@ -193,7 +206,15 @@ class DuckDBLite:
193
206
  source = query_alias_to_name[alias]
194
207
  tomb_def = tombstone_views.get(alias)
195
208
  view = f"tomb_{source}_{query_suffix}"
196
- create_tombstone_view(con, source, view, tomb_def)
209
+ # Reuse a materialised deletion-vector table when the cache is
210
+ # enabled and the alias has a stable key; otherwise the call
211
+ # falls back to the inline read_parquet path (dv_table=None).
212
+ cache_key = getattr(tomb_def, "cache_key", None) if tomb_def else None
213
+ tomb_path = getattr(tomb_def, "tombstone_path", None) if tomb_def else None
214
+ dv_table = self._tombstone_cache.acquire(con, cache_key, tomb_path)
215
+ if dv_table:
216
+ acquired_dv_keys.append(cache_key)
217
+ create_tombstone_view(con, source, view, tomb_def, dv_table=dv_table)
197
218
  created_views.append(view)
198
219
  query_alias_to_name[alias] = view
199
220
 
@@ -260,4 +281,11 @@ class DuckDBLite:
260
281
  con.execute(f"DROP VIEW IF EXISTS {view};")
261
282
  except Exception:
262
283
  pass
284
+ # Release deletion-vector refs now the views referencing them are
285
+ # gone; this may evict + DROP unreferenced DV tables over capacity.
286
+ for cache_key in acquired_dv_keys:
287
+ try:
288
+ self._tombstone_cache.release(con, cache_key)
289
+ except Exception:
290
+ pass
263
291
 
@@ -29,6 +29,7 @@ from supertable.engine.engine_common import (
29
29
  create_rbac_view,
30
30
  create_tombstone_view,
31
31
  rbac_view_name,
32
+ TombstoneCache,
32
33
  )
33
34
 
34
35
 
@@ -80,6 +81,14 @@ class DuckDBPro:
80
81
  # Multiple entries per key when old version still has in-flight queries.
81
82
  self._registry: Dict[Tuple[str, str], List[_ProCacheEntry]] = {}
82
83
 
84
+ # Shared deletion-vector table cache: per-table eviction (idle TTL +
85
+ # per-table version cap), bounded by config. Tables live on the
86
+ # persistent connection and are forgotten when it resets.
87
+ self._tombstone_cache = TombstoneCache(
88
+ settings.SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_MAX_PER_TABLE,
89
+ settings.SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_TTL_SEC,
90
+ )
91
+
83
92
  # Temp dir for spill — set on first query
84
93
  self._temp_dir: Optional[str] = None
85
94
 
@@ -123,6 +132,8 @@ class DuckDBPro:
123
132
  self._con = None
124
133
  self._httpfs_configured = False
125
134
  self._registry.clear()
135
+ # DV tables died with the connection — just forget the registry.
136
+ self._tombstone_cache.clear_registry()
126
137
  logger.warning("[duckdb.pro] connection reset — all cached views lost")
127
138
 
128
139
  # ---------------------------------------------------------
@@ -313,6 +324,8 @@ class DuckDBPro:
313
324
  # assignments are reached (which would cause a NameError otherwise).
314
325
  rbac_view_names: List[str] = []
315
326
  tombstone_view_names: List[str] = []
327
+ # Deletion-vector cache keys acquired this query — released in finally.
328
+ acquired_dv_keys: List[str] = []
316
329
  try:
317
330
  query_alias_to_name = dict(alias_to_table_name)
318
331
  # Per-query suffix so concurrent queries never collide on a shared
@@ -328,8 +341,18 @@ class DuckDBPro:
328
341
  source = query_alias_to_name[alias]
329
342
  tomb_def = tombstone_views.get(alias)
330
343
  view = f"tomb_{source}_{query_suffix}"
344
+ # Reuse a materialised deletion-vector table when the cache is
345
+ # enabled and the alias has a stable key; otherwise fall back to
346
+ # the inline read_parquet path (dv_table=None). All DDL — the DV
347
+ # CREATE TABLE inside acquire() and the view creation — runs
348
+ # under the connection lock, matching Pro's serialised model.
349
+ cache_key = getattr(tomb_def, "cache_key", None) if tomb_def else None
350
+ tomb_path = getattr(tomb_def, "tombstone_path", None) if tomb_def else None
331
351
  with self._lock:
332
- create_tombstone_view(con, source, view, tomb_def)
352
+ dv_table = self._tombstone_cache.acquire(con, cache_key, tomb_path)
353
+ if dv_table:
354
+ acquired_dv_keys.append(cache_key)
355
+ create_tombstone_view(con, source, view, tomb_def, dv_table=dv_table)
333
356
  tombstone_view_names.append(view)
334
357
  query_alias_to_name[alias] = view
335
358
 
@@ -395,6 +418,16 @@ class DuckDBPro:
395
418
  except Exception:
396
419
  pass
397
420
 
421
+ # Release deletion-vector refs now their views are gone; this may
422
+ # evict + DROP unreferenced DV tables over capacity.
423
+ if acquired_dv_keys:
424
+ with self._lock:
425
+ for cache_key in acquired_dv_keys:
426
+ try:
427
+ self._tombstone_cache.release(con, cache_key)
428
+ except Exception:
429
+ pass
430
+
398
431
  # Release refs and drop stale tables
399
432
  with self._lock:
400
433
  self._release_refs(tables_used)