supertable 2.3.6__tar.gz → 2.3.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. {supertable-2.3.6/supertable.egg-info → supertable-2.3.7}/PKG-INFO +1 -1
  2. {supertable-2.3.6 → supertable-2.3.7}/pyproject.toml +1 -1
  3. {supertable-2.3.6 → supertable-2.3.7}/setup.py +1 -1
  4. {supertable-2.3.6 → supertable-2.3.7}/supertable/__init__.py +1 -1
  5. {supertable-2.3.6 → supertable-2.3.7}/supertable/config/settings.py +8 -0
  6. {supertable-2.3.6 → supertable-2.3.7}/supertable/processing.py +15 -7
  7. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_resolve_overwrite_writes.py +16 -0
  8. supertable-2.3.7/supertable/tests/test_write_probe_gate.py +130 -0
  9. {supertable-2.3.6 → supertable-2.3.7/supertable.egg-info}/PKG-INFO +1 -1
  10. {supertable-2.3.6 → supertable-2.3.7}/supertable.egg-info/SOURCES.txt +1 -0
  11. {supertable-2.3.6 → supertable-2.3.7}/LICENSE +0 -0
  12. {supertable-2.3.6 → supertable-2.3.7}/README.md +0 -0
  13. {supertable-2.3.6 → supertable-2.3.7}/requirements.txt +0 -0
  14. {supertable-2.3.6 → supertable-2.3.7}/setup.cfg +0 -0
  15. {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/__init__.py +0 -0
  16. {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/admin.py +0 -0
  17. {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/chain.py +0 -0
  18. {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/consumers.py +0 -0
  19. {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/crypto.py +0 -0
  20. {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/events.py +0 -0
  21. {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/export.py +0 -0
  22. {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/logger.py +0 -0
  23. {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/middleware.py +0 -0
  24. {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/reader.py +0 -0
  25. {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/retention.py +0 -0
  26. {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/tests/__init__.py +0 -0
  27. {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/tests/test_chain.py +0 -0
  28. {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/tests/test_crypto.py +0 -0
  29. {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/tests/test_emit.py +0 -0
  30. {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/tests/test_events.py +0 -0
  31. {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/tests/test_retention.py +0 -0
  32. {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/writer_parquet.py +0 -0
  33. {supertable-2.3.6 → supertable-2.3.7}/supertable/audit/writer_redis.py +0 -0
  34. {supertable-2.3.6 → supertable-2.3.7}/supertable/config/__init__.py +0 -0
  35. {supertable-2.3.6 → supertable-2.3.7}/supertable/config/defaults.py +0 -0
  36. {supertable-2.3.6 → supertable-2.3.7}/supertable/config/homedir.py +0 -0
  37. {supertable-2.3.6 → supertable-2.3.7}/supertable/config/tests/__init__.py +0 -0
  38. {supertable-2.3.6 → supertable-2.3.7}/supertable/config/tests/test_defaults.py +0 -0
  39. {supertable-2.3.6 → supertable-2.3.7}/supertable/config/tests/test_homedir.py +0 -0
  40. {supertable-2.3.6 → supertable-2.3.7}/supertable/config/tests/test_settings.py +0 -0
  41. {supertable-2.3.6 → supertable-2.3.7}/supertable/data_classes.py +0 -0
  42. {supertable-2.3.6 → supertable-2.3.7}/supertable/data_reader.py +0 -0
  43. {supertable-2.3.6 → supertable-2.3.7}/supertable/data_writer.py +0 -0
  44. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/__init__.py +0 -0
  45. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/__init__.py +0 -0
  46. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/__main__.py +0 -0
  47. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/check_filter_builder.py +0 -0
  48. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/controller.py +0 -0
  49. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/data_writer_helpers.py +0 -0
  50. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/defaults.py +0 -0
  51. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/dummy_data.py +0 -0
  52. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/read_parquet_header.py +0 -0
  53. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s01_01_01_create_super_table.py +0 -0
  54. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s01_01_02_enable_mirroring_formats.py +0 -0
  55. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s01_02_create_roles.py +0 -0
  56. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s01_03_create_users.py +0 -0
  57. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s02_01_write_dummy_data.py +0 -0
  58. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s02_02_write_single_data.py +0 -0
  59. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s02_03_01_write_staging.py +0 -0
  60. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s02_03_02_create_pipe.py +0 -0
  61. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s02_04_01_write_monitoring_simple.py +0 -0
  62. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s02_04_02_write_monitoring_parallel.py +0 -0
  63. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s02_05_write_tombstone.py +0 -0
  64. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s03_01_read_data_error.py +0 -0
  65. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s03_02_01_read_super_data_ok.py +0 -0
  66. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s03_02_02_read_table_data_ok.py +0 -0
  67. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s03_03_read_meta.py +0 -0
  68. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s03_04_read_staging.py +0 -0
  69. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s03_06_01_read_roles.py +0 -0
  70. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s03_06_02_read_user.py +0 -0
  71. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s03_07_01_estimate_read.py +0 -0
  72. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s03_07_02_estimate_files.py +0 -0
  73. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s03_08_read_snapshot_history.py +0 -0
  74. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s04_01_03_delete_pipe.py +0 -0
  75. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s05_01_delete_table.py +0 -0
  76. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/quickstart/s05_02_delete_super_table.py +0 -0
  77. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/webshop/__init__.py +0 -0
  78. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/webshop/core.py +0 -0
  79. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/webshop/defaults.py +0 -0
  80. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/webshop/generate.py +0 -0
  81. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/webshop/load.py +0 -0
  82. {supertable-2.3.6 → supertable-2.3.7}/supertable/demo/webshop/topup.py +0 -0
  83. {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/__init__.py +0 -0
  84. {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/data_estimator.py +0 -0
  85. {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/duckdb_lite.py +0 -0
  86. {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/duckdb_pro.py +0 -0
  87. {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/engine_common.py +0 -0
  88. {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/engine_config.py +0 -0
  89. {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/engine_enum.py +0 -0
  90. {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/executor.py +0 -0
  91. {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/plan_stats.py +0 -0
  92. {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/spark_thrift.py +0 -0
  93. {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/tests/__init__.py +0 -0
  94. {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/tests/conftest.py +0 -0
  95. {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/tests/test_engine.py +0 -0
  96. {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/tests/test_engine_config.py +0 -0
  97. {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/tests/test_engine_routing.py +0 -0
  98. {supertable-2.3.6 → supertable-2.3.7}/supertable/engine/tests/test_engine_spill.py +0 -0
  99. {supertable-2.3.6 → supertable-2.3.7}/supertable/errors.py +0 -0
  100. {supertable-2.3.6 → supertable-2.3.7}/supertable/locking/__init__.py +0 -0
  101. {supertable-2.3.6 → supertable-2.3.7}/supertable/locking/benchmarks/__init__.py +0 -0
  102. {supertable-2.3.6 → supertable-2.3.7}/supertable/locking/benchmarks/benchmark_locking.py +0 -0
  103. {supertable-2.3.6 → supertable-2.3.7}/supertable/locking/benchmarks/measure_lock_speed.py +0 -0
  104. {supertable-2.3.6 → supertable-2.3.7}/supertable/locking/benchmarks/measure_lock_time.py +0 -0
  105. {supertable-2.3.6 → supertable-2.3.7}/supertable/locking/file_lock.py +0 -0
  106. {supertable-2.3.6 → supertable-2.3.7}/supertable/locking/redis_lock.py +0 -0
  107. {supertable-2.3.6 → supertable-2.3.7}/supertable/locking/tests/__init__.py +0 -0
  108. {supertable-2.3.6 → supertable-2.3.7}/supertable/locking/tests/test_file_lock.py +0 -0
  109. {supertable-2.3.6 → supertable-2.3.7}/supertable/locking/tests/test_redis_lock.py +0 -0
  110. {supertable-2.3.6 → supertable-2.3.7}/supertable/logging.py +0 -0
  111. {supertable-2.3.6 → supertable-2.3.7}/supertable/meta_reader.py +0 -0
  112. {supertable-2.3.6 → supertable-2.3.7}/supertable/mirroring/__init__.py +0 -0
  113. {supertable-2.3.6 → supertable-2.3.7}/supertable/mirroring/mirror_delta.py +0 -0
  114. {supertable-2.3.6 → supertable-2.3.7}/supertable/mirroring/mirror_formats.py +0 -0
  115. {supertable-2.3.6 → supertable-2.3.7}/supertable/mirroring/mirror_iceberg.py +0 -0
  116. {supertable-2.3.6 → supertable-2.3.7}/supertable/mirroring/mirror_parquet.py +0 -0
  117. {supertable-2.3.6 → supertable-2.3.7}/supertable/monitoring/__init__.py +0 -0
  118. {supertable-2.3.6 → supertable-2.3.7}/supertable/monitoring/partitions.py +0 -0
  119. {supertable-2.3.6 → supertable-2.3.7}/supertable/monitoring_writer.py +0 -0
  120. {supertable-2.3.6 → supertable-2.3.7}/supertable/plan_extender.py +0 -0
  121. {supertable-2.3.6 → supertable-2.3.7}/supertable/query_plan_manager.py +0 -0
  122. {supertable-2.3.6 → supertable-2.3.7}/supertable/rbac/__init__.py +0 -0
  123. {supertable-2.3.6 → supertable-2.3.7}/supertable/rbac/access_control.py +0 -0
  124. {supertable-2.3.6 → supertable-2.3.7}/supertable/rbac/filter_builder.py +0 -0
  125. {supertable-2.3.6 → supertable-2.3.7}/supertable/rbac/permissions.py +0 -0
  126. {supertable-2.3.6 → supertable-2.3.7}/supertable/rbac/role_manager.py +0 -0
  127. {supertable-2.3.6 → supertable-2.3.7}/supertable/rbac/row_column_security.py +0 -0
  128. {supertable-2.3.6 → supertable-2.3.7}/supertable/rbac/tests/test_filter_builder.py +0 -0
  129. {supertable-2.3.6 → supertable-2.3.7}/supertable/rbac/tests/test_rbac.py +0 -0
  130. {supertable-2.3.6 → supertable-2.3.7}/supertable/rbac/tests/test_rbac_per_table.py +0 -0
  131. {supertable-2.3.6 → supertable-2.3.7}/supertable/rbac/user_manager.py +0 -0
  132. {supertable-2.3.6 → supertable-2.3.7}/supertable/redis_catalog.py +0 -0
  133. {supertable-2.3.6 → supertable-2.3.7}/supertable/redis_connector.py +0 -0
  134. {supertable-2.3.6 → supertable-2.3.7}/supertable/redis_infra.py +0 -0
  135. {supertable-2.3.6 → supertable-2.3.7}/supertable/redis_keys.py +0 -0
  136. {supertable-2.3.6 → supertable-2.3.7}/supertable/simple_table.py +0 -0
  137. {supertable-2.3.6 → supertable-2.3.7}/supertable/staging_area.py +0 -0
  138. {supertable-2.3.6 → supertable-2.3.7}/supertable/storage/__init__.py +0 -0
  139. {supertable-2.3.6 → supertable-2.3.7}/supertable/storage/azure_storage.py +0 -0
  140. {supertable-2.3.6 → supertable-2.3.7}/supertable/storage/gcp_storage.py +0 -0
  141. {supertable-2.3.6 → supertable-2.3.7}/supertable/storage/local_storage.py +0 -0
  142. {supertable-2.3.6 → supertable-2.3.7}/supertable/storage/minio_storage.py +0 -0
  143. {supertable-2.3.6 → supertable-2.3.7}/supertable/storage/s3_storage.py +0 -0
  144. {supertable-2.3.6 → supertable-2.3.7}/supertable/storage/storage_factory.py +0 -0
  145. {supertable-2.3.6 → supertable-2.3.7}/supertable/storage/storage_interface.py +0 -0
  146. {supertable-2.3.6 → supertable-2.3.7}/supertable/storage/tests/test_storage.py +0 -0
  147. {supertable-2.3.6 → supertable-2.3.7}/supertable/super_pipe.py +0 -0
  148. {supertable-2.3.6 → supertable-2.3.7}/supertable/super_table.py +0 -0
  149. {supertable-2.3.6 → supertable-2.3.7}/supertable/system_query.py +0 -0
  150. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/__init__.py +0 -0
  151. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_align_to_schema_fix.py +0 -0
  152. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_create_if_missing.py +0 -0
  153. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_data_reader.py +0 -0
  154. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_data_reader_preflight.py +0 -0
  155. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_data_writer.py +0 -0
  156. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_data_writer_compact.py +0 -0
  157. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_data_writer_comprehensive.py +0 -0
  158. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_errors.py +0 -0
  159. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_meta_reader.py +0 -0
  160. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_monitoring_partitions.py +0 -0
  161. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_monitoring_sink_guard.py +0 -0
  162. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_newer_than.py +0 -0
  163. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_parquet_statistics.py +0 -0
  164. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_processing.py +0 -0
  165. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_processing_compact_resources.py +0 -0
  166. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_processing_stats.py +0 -0
  167. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_query_sql.py +0 -0
  168. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_read_pruning_differential.py +0 -0
  169. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_read_pruning_integration.py +0 -0
  170. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_redis_key_prefix.py +0 -0
  171. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_simple_table.py +0 -0
  172. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_stats_cache.py +0 -0
  173. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_stats_pruning.py +0 -0
  174. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_stats_schema_snapshot.py +0 -0
  175. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_super_table.py +0 -0
  176. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_supertable_all.py +0 -0
  177. {supertable-2.3.6 → supertable-2.3.7}/supertable/tests/test_system_query.py +0 -0
  178. {supertable-2.3.6 → supertable-2.3.7}/supertable/utils/__init__.py +0 -0
  179. {supertable-2.3.6 → supertable-2.3.7}/supertable/utils/helper.py +0 -0
  180. {supertable-2.3.6 → supertable-2.3.7}/supertable/utils/profiler.py +0 -0
  181. {supertable-2.3.6 → supertable-2.3.7}/supertable/utils/sql_parser.py +0 -0
  182. {supertable-2.3.6 → supertable-2.3.7}/supertable/utils/tests/test_sql_parser_columns.py +0 -0
  183. {supertable-2.3.6 → supertable-2.3.7}/supertable/utils/timer.py +0 -0
  184. {supertable-2.3.6 → supertable-2.3.7}/supertable.egg-info/dependency_links.txt +0 -0
  185. {supertable-2.3.6 → supertable-2.3.7}/supertable.egg-info/entry_points.txt +0 -0
  186. {supertable-2.3.6 → supertable-2.3.7}/supertable.egg-info/requires.txt +0 -0
  187. {supertable-2.3.6 → supertable-2.3.7}/supertable.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: supertable
3
- Version: 2.3.6
3
+ Version: 2.3.7
4
4
  Summary: SuperTable — versioned data lake library for SQL analytics on Parquet + Redis.
5
5
  Author: Levente Kupas
6
6
  Author-email: Levente Kupas <lkupas@kladnasoft.com>
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "supertable"
7
- version = "2.3.6"
7
+ version = "2.3.7"
8
8
  description = "SuperTable — versioned data lake library for SQL analytics on Parquet + Redis."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -19,7 +19,7 @@ long_description = readme.read_text(encoding="utf-8") if readme.exists() else ""
19
19
 
20
20
  setup(
21
21
  name="supertable",
22
- version="2.3.6",
22
+ version="2.3.7",
23
23
  description="SuperTable — versioned data lake library for SQL analytics on Parquet + Redis.",
24
24
  long_description=long_description,
25
25
  long_description_content_type="text/markdown",
@@ -25,7 +25,7 @@ See the ``supertable.demo`` package for runnable end-to-end demos and the
25
25
  project documentation for the full API surface.
26
26
  """
27
27
 
28
- __version__ = "2.3.6"
28
+ __version__ = "2.3.7"
29
29
 
30
30
  # Re-export the core public surface so users can do ``from supertable import …``
31
31
  # instead of remembering submodule paths.
@@ -157,6 +157,13 @@ class Settings:
157
157
  SUPERTABLE_DUCKDB_MATERIALIZE: str = "view" # SUPERTABLE_DUCKDB_MATERIALIZE
158
158
  SUPERTABLE_DUCKDB_PRESIGNED: bool = False # SUPERTABLE_DUCKDB_PRESIGNED
159
159
  SUPERTABLE_DUCKDB_USE_HTTPFS: bool = False # SUPERTABLE_DUCKDB_USE_HTTPFS
160
+ # Write-path overwrite/delete resolution via the DuckDB pushdown probe.
161
+ # Disabled by default: the polars fallback reads only the projected key
162
+ # columns through the storage SDK and needs no httpfs extension, so it works
163
+ # in environments without one (or without internet to install it). Enable
164
+ # only where httpfs is available and the probe's row-group skipping is worth
165
+ # it (e.g. very wide tables / many overlapping files).
166
+ SUPERTABLE_DUCKDB_WRITE_PROBE: bool = False # SUPERTABLE_DUCKDB_WRITE_PROBE
160
167
  # Deletion-vector (tombstone) table cache. Each entry is a small
161
168
  # `DISTINCT __rowid__` table keyed by the stable tombstone path; the
162
169
  # tombstone view ANTI JOINs it instead of re-reading the parquet every
@@ -437,6 +444,7 @@ def _build_settings() -> Settings:
437
444
  SUPERTABLE_DUCKDB_MATERIALIZE=_env_str("SUPERTABLE_DUCKDB_MATERIALIZE", "view"),
438
445
  SUPERTABLE_DUCKDB_PRESIGNED=_env_bool("SUPERTABLE_DUCKDB_PRESIGNED", False),
439
446
  SUPERTABLE_DUCKDB_USE_HTTPFS=_env_bool("SUPERTABLE_DUCKDB_USE_HTTPFS", False),
447
+ SUPERTABLE_DUCKDB_WRITE_PROBE=_env_bool("SUPERTABLE_DUCKDB_WRITE_PROBE", False),
440
448
  SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_MAX_PER_TABLE=_env_int("SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_MAX_PER_TABLE", 8),
441
449
  SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_TTL_SEC=_env_int("SUPERTABLE_DUCKDB_TOMBSTONE_CACHE_TTL_SEC", 300),
442
450
  SUPERTABLE_DEBUG_TIMINGS=_env_bool("SUPERTABLE_DEBUG_TIMINGS", False),
@@ -1316,11 +1316,13 @@ def resolve_overwrite_writes(
1316
1316
  ) -> Tuple[polars.DataFrame, List[Tuple[str, int]]]:
1317
1317
  """Single-pass overwrite resolution: stale filtering + delete-vector pairs.
1318
1318
 
1319
- Returns ``(filtered_incoming_df, delete_pairs)`` computed from ONE DuckDB
1320
- pushdown probe over the overlapping files. Falls back to the original polars
1319
+ Returns ``(filtered_incoming_df, delete_pairs)``. When the DuckDB pushdown
1320
+ probe is enabled (``SUPERTABLE_DUCKDB_WRITE_PROBE``) it is computed from ONE
1321
+ probe over the overlapping files. Falls back to the original polars
1321
1322
  full-read path (``filter_stale_incoming_rows`` + ``identify_deleted_rowids``)
1322
- when DuckDB is unavailable, the probe fails, or the file schema can't be
1323
- probed — semantics are identical on both paths.
1323
+ when the probe is disabled (the default), DuckDB is unavailable, the probe
1324
+ fails, or the file schema can't be probed — semantics are identical on both
1325
+ paths.
1324
1326
 
1325
1327
  *newer_than_col* falsy ⇒ no stale filtering (delete/upsert without conflict
1326
1328
  resolution); the incoming df is returned unchanged and every overlapping row
@@ -1343,9 +1345,15 @@ def resolve_overwrite_writes(
1343
1345
  f"{incoming_keys.height} unique incoming key(s) on {overwrite_columns}, "
1344
1346
  f"newer_than={newer_than_col}"
1345
1347
  )
1346
- matched = _duckdb_probe_overlap_matches(
1347
- overlap_true, overwrite_columns, newer_than_col, incoming_keys, profiler=p,
1348
- )
1348
+ # The DuckDB pushdown probe is opt-in (SUPERTABLE_DUCKDB_WRITE_PROBE). When
1349
+ # disabled (the default), skip it entirely and use the polars fallback below,
1350
+ # which reads only the projected key columns via the storage SDK and needs no
1351
+ # httpfs extension — the safe path for environments without one.
1352
+ matched = None
1353
+ if settings.SUPERTABLE_DUCKDB_WRITE_PROBE:
1354
+ matched = _duckdb_probe_overlap_matches(
1355
+ overlap_true, overwrite_columns, newer_than_col, incoming_keys, profiler=p,
1356
+ )
1349
1357
  if matched is not None:
1350
1358
  try:
1351
1359
  return _derive_stale_and_deletes(
@@ -20,21 +20,37 @@ local files regardless of the ambient STORAGE_TYPE.
20
20
  """
21
21
  from __future__ import annotations
22
22
 
23
+ import dataclasses
23
24
  from unittest.mock import patch
24
25
 
25
26
  import polars as pl
26
27
  import pyarrow.parquet as pq
27
28
  import pytest
28
29
 
30
+ import supertable.processing as st_processing
29
31
  from supertable.processing import (
30
32
  resolve_overwrite_writes,
31
33
  filter_stale_incoming_rows,
32
34
  identify_deleted_rowids,
33
35
  )
36
+ from supertable.config.settings import settings
34
37
  from supertable.storage.local_storage import LocalStorage
35
38
  from supertable.utils.profiler import Profiler
36
39
 
37
40
 
41
+ @pytest.fixture(autouse=True)
42
+ def _enable_write_probe(monkeypatch):
43
+ """These tests validate the DuckDB pushdown probe path, which is opt-in
44
+ (``SUPERTABLE_DUCKDB_WRITE_PROBE``, default off). Force it on so the probe
45
+ is actually exercised; without this the gate in ``resolve_overwrite_writes``
46
+ would route every call to the polars fallback and the probe assertions
47
+ (``probe_files`` present, no ``overwrite_resolve_fallback``) would be vacuous."""
48
+ monkeypatch.setattr(
49
+ st_processing, "settings",
50
+ dataclasses.replace(settings, SUPERTABLE_DUCKDB_WRITE_PROBE=True),
51
+ )
52
+
53
+
38
54
  # ---------------------------------------------------------------------------
39
55
  # Helpers
40
56
  # ---------------------------------------------------------------------------
@@ -0,0 +1,130 @@
1
+ # supertable/tests/test_write_probe_gate.py
2
+ """Gate test for ``SUPERTABLE_DUCKDB_WRITE_PROBE``.
3
+
4
+ The DuckDB pushdown probe in the overwrite/delete write path is opt-in and
5
+ disabled by default. Environments without the httpfs extension (or without
6
+ internet to install it) must NOT stall on a DuckDB httpfs install; they use the
7
+ polars fallback, which reads only the projected key columns through the storage
8
+ SDK. These tests pin the gate's contract:
9
+
10
+ * flag OFF (default) -> the probe is never called; resolution goes through the
11
+ polars fallback (profiler 'overwrite_resolve_fallback' set, no 'probe_files').
12
+ * flag ON -> the probe IS called ('probe_files' set).
13
+ * both produce identical (filtered rows, delete pairs) -- the gate changes
14
+ only the mechanism, never the result (the fallback is the semantic oracle).
15
+ """
16
+ from __future__ import annotations
17
+
18
+ import dataclasses
19
+ from unittest.mock import patch
20
+
21
+ import polars as pl
22
+ import pyarrow.parquet as pq
23
+ import pytest
24
+
25
+ import supertable.processing as st_processing
26
+ from supertable.config.settings import settings
27
+ from supertable.processing import resolve_overwrite_writes
28
+ from supertable.storage.local_storage import LocalStorage
29
+ from supertable.utils.profiler import Profiler
30
+
31
+
32
+ @pytest.fixture(autouse=True)
33
+ def _local_storage():
34
+ """Force both probe and fallback reads through a real LocalStorage so the
35
+ on-disk tmp parquet files are read identically regardless of STORAGE_TYPE."""
36
+ with patch("supertable.processing._get_storage", return_value=LocalStorage()):
37
+ yield
38
+
39
+
40
+ def _write(d, name, df):
41
+ path = str(d / name)
42
+ pq.write_table(df.to_arrow(), path)
43
+ return (path, True, (d / name).stat().st_size)
44
+
45
+
46
+ def _set_probe(monkeypatch, enabled: bool):
47
+ monkeypatch.setattr(
48
+ st_processing, "settings",
49
+ dataclasses.replace(settings, SUPERTABLE_DUCKDB_WRITE_PROBE=enabled),
50
+ )
51
+
52
+
53
+ def _spy_probe(monkeypatch):
54
+ """Wrap the real probe to count calls without altering its behavior."""
55
+ calls = {"n": 0}
56
+ real = st_processing._duckdb_probe_overlap_matches
57
+
58
+ def _counting(*a, **k):
59
+ calls["n"] += 1
60
+ return real(*a, **k)
61
+
62
+ monkeypatch.setattr(st_processing, "_duckdb_probe_overlap_matches", _counting)
63
+ return calls
64
+
65
+
66
+ def _resolve(incoming, files, keys, ntc, prof):
67
+ return resolve_overwrite_writes(
68
+ incoming_df=incoming, overlapping_files=files,
69
+ overwrite_columns=keys, newer_than_col=ntc, profiler=prof,
70
+ )
71
+
72
+
73
+ def test_probe_disabled_by_default_uses_fallback(tmp_path, monkeypatch):
74
+ f = _write(tmp_path, "a.parquet", pl.DataFrame(
75
+ {"__rowid__": [1], "user_id": [5], "name": ["Alice"], "updated_at": [7]}))
76
+ incoming = pl.DataFrame({"user_id": [5], "name": ["Bob"], "updated_at": [9]})
77
+
78
+ _set_probe(monkeypatch, False)
79
+ calls = _spy_probe(monkeypatch)
80
+ prof = Profiler()
81
+ filt, pairs = _resolve(incoming, {f}, ["user_id"], "updated_at", prof)
82
+
83
+ counts = prof.emit_counts()
84
+ assert calls["n"] == 0, "probe must NOT be called when the flag is off"
85
+ assert "overwrite_resolve_fallback" in counts, f"fallback not taken; counts={counts}"
86
+ assert "probe_files" not in counts, f"probe ran despite flag off; counts={counts}"
87
+ # Correct result via the fallback: the newer incoming row survives and
88
+ # tombstones the existing row's __rowid__.
89
+ assert filt.height == 1
90
+ assert pairs == [(f[0], 1)]
91
+
92
+
93
+ def test_probe_enabled_calls_probe(tmp_path, monkeypatch):
94
+ f = _write(tmp_path, "a.parquet", pl.DataFrame(
95
+ {"__rowid__": [1], "user_id": [5], "name": ["Alice"], "updated_at": [7]}))
96
+ incoming = pl.DataFrame({"user_id": [5], "name": ["Bob"], "updated_at": [9]})
97
+
98
+ _set_probe(monkeypatch, True)
99
+ calls = _spy_probe(monkeypatch)
100
+ prof = Profiler()
101
+ filt, pairs = _resolve(incoming, {f}, ["user_id"], "updated_at", prof)
102
+
103
+ counts = prof.emit_counts()
104
+ assert calls["n"] == 1, "probe must be called when the flag is on"
105
+ assert "probe_files" in counts, f"probe did not run; counts={counts}"
106
+ assert filt.height == 1
107
+ assert pairs == [(f[0], 1)]
108
+
109
+
110
+ def test_gate_result_identical_on_and_off(tmp_path, monkeypatch):
111
+ """The flag changes only the mechanism: filtered rows + delete pairs match.
112
+
113
+ user_id=5 is newer (9 > 7) -> survives + tombstones rowid 1; user_id=6 is
114
+ stale (3 < 7) -> dropped, no tombstone. Identical on both code paths.
115
+ """
116
+ f = _write(tmp_path, "d.parquet", pl.DataFrame(
117
+ {"__rowid__": [1, 2], "user_id": [5, 6], "name": ["A", "B"],
118
+ "updated_at": [7, 7]}))
119
+ incoming = pl.DataFrame(
120
+ {"user_id": [5, 6], "name": ["X", "Y"], "updated_at": [9, 3]})
121
+
122
+ def _run(enabled):
123
+ _set_probe(monkeypatch, enabled)
124
+ filt, pairs = _resolve(incoming, {f}, ["user_id"], "updated_at", Profiler())
125
+ rows = sorted(
126
+ filt.select(["user_id", "name", "updated_at"]).to_dicts(), key=repr
127
+ )
128
+ return rows, sorted(pairs)
129
+
130
+ assert _run(True) == _run(False)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: supertable
3
- Version: 2.3.6
3
+ Version: 2.3.7
4
4
  Summary: SuperTable — versioned data lake library for SQL analytics on Parquet + Redis.
5
5
  Author: Levente Kupas
6
6
  Author-email: Levente Kupas <lkupas@kladnasoft.com>
@@ -176,6 +176,7 @@ supertable/tests/test_stats_schema_snapshot.py
176
176
  supertable/tests/test_super_table.py
177
177
  supertable/tests/test_supertable_all.py
178
178
  supertable/tests/test_system_query.py
179
+ supertable/tests/test_write_probe_gate.py
179
180
  supertable/utils/__init__.py
180
181
  supertable/utils/helper.py
181
182
  supertable/utils/profiler.py
File without changes
File without changes
File without changes
File without changes