supertable 2.3.4__tar.gz → 2.3.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. {supertable-2.3.4/supertable.egg-info → supertable-2.3.6}/PKG-INFO +1 -1
  2. {supertable-2.3.4 → supertable-2.3.6}/pyproject.toml +1 -1
  3. {supertable-2.3.4 → supertable-2.3.6}/setup.py +1 -1
  4. {supertable-2.3.4 → supertable-2.3.6}/supertable/__init__.py +1 -1
  5. {supertable-2.3.4 → supertable-2.3.6}/supertable/config/defaults.py +6 -0
  6. supertable-2.3.6/supertable/config/homedir.py +96 -0
  7. {supertable-2.3.4 → supertable-2.3.6}/supertable/data_writer.py +169 -25
  8. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/defaults.py +3 -1
  9. {supertable-2.3.4 → supertable-2.3.6}/supertable/engine/engine_common.py +116 -61
  10. {supertable-2.3.4 → supertable-2.3.6}/supertable/engine/tests/conftest.py +15 -0
  11. {supertable-2.3.4 → supertable-2.3.6}/supertable/engine/tests/test_engine.py +32 -0
  12. {supertable-2.3.4 → supertable-2.3.6}/supertable/processing.py +143 -35
  13. {supertable-2.3.4 → supertable-2.3.6}/supertable/simple_table.py +0 -9
  14. {supertable-2.3.4 → supertable-2.3.6}/supertable/storage/azure_storage.py +7 -2
  15. {supertable-2.3.4 → supertable-2.3.6}/supertable/storage/gcp_storage.py +7 -2
  16. {supertable-2.3.4 → supertable-2.3.6}/supertable/storage/local_storage.py +15 -4
  17. {supertable-2.3.4 → supertable-2.3.6}/supertable/storage/minio_storage.py +7 -2
  18. {supertable-2.3.4 → supertable-2.3.6}/supertable/storage/s3_storage.py +7 -2
  19. {supertable-2.3.4 → supertable-2.3.6}/supertable/storage/storage_interface.py +21 -2
  20. {supertable-2.3.4 → supertable-2.3.6}/supertable/super_table.py +0 -6
  21. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_data_writer_compact.py +36 -5
  22. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_data_writer_comprehensive.py +2 -1
  23. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_processing_stats.py +63 -1
  24. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_resolve_overwrite_writes.py +6 -5
  25. {supertable-2.3.4 → supertable-2.3.6/supertable.egg-info}/PKG-INFO +1 -1
  26. supertable-2.3.4/supertable/config/homedir.py +0 -62
  27. {supertable-2.3.4 → supertable-2.3.6}/LICENSE +0 -0
  28. {supertable-2.3.4 → supertable-2.3.6}/README.md +0 -0
  29. {supertable-2.3.4 → supertable-2.3.6}/requirements.txt +0 -0
  30. {supertable-2.3.4 → supertable-2.3.6}/setup.cfg +0 -0
  31. {supertable-2.3.4 → supertable-2.3.6}/supertable/audit/__init__.py +0 -0
  32. {supertable-2.3.4 → supertable-2.3.6}/supertable/audit/admin.py +0 -0
  33. {supertable-2.3.4 → supertable-2.3.6}/supertable/audit/chain.py +0 -0
  34. {supertable-2.3.4 → supertable-2.3.6}/supertable/audit/consumers.py +0 -0
  35. {supertable-2.3.4 → supertable-2.3.6}/supertable/audit/crypto.py +0 -0
  36. {supertable-2.3.4 → supertable-2.3.6}/supertable/audit/events.py +0 -0
  37. {supertable-2.3.4 → supertable-2.3.6}/supertable/audit/export.py +0 -0
  38. {supertable-2.3.4 → supertable-2.3.6}/supertable/audit/logger.py +0 -0
  39. {supertable-2.3.4 → supertable-2.3.6}/supertable/audit/middleware.py +0 -0
  40. {supertable-2.3.4 → supertable-2.3.6}/supertable/audit/reader.py +0 -0
  41. {supertable-2.3.4 → supertable-2.3.6}/supertable/audit/retention.py +0 -0
  42. {supertable-2.3.4 → supertable-2.3.6}/supertable/audit/tests/__init__.py +0 -0
  43. {supertable-2.3.4 → supertable-2.3.6}/supertable/audit/tests/test_chain.py +0 -0
  44. {supertable-2.3.4 → supertable-2.3.6}/supertable/audit/tests/test_crypto.py +0 -0
  45. {supertable-2.3.4 → supertable-2.3.6}/supertable/audit/tests/test_emit.py +0 -0
  46. {supertable-2.3.4 → supertable-2.3.6}/supertable/audit/tests/test_events.py +0 -0
  47. {supertable-2.3.4 → supertable-2.3.6}/supertable/audit/tests/test_retention.py +0 -0
  48. {supertable-2.3.4 → supertable-2.3.6}/supertable/audit/writer_parquet.py +0 -0
  49. {supertable-2.3.4 → supertable-2.3.6}/supertable/audit/writer_redis.py +0 -0
  50. {supertable-2.3.4 → supertable-2.3.6}/supertable/config/__init__.py +0 -0
  51. {supertable-2.3.4 → supertable-2.3.6}/supertable/config/settings.py +0 -0
  52. {supertable-2.3.4 → supertable-2.3.6}/supertable/config/tests/__init__.py +0 -0
  53. {supertable-2.3.4 → supertable-2.3.6}/supertable/config/tests/test_defaults.py +0 -0
  54. {supertable-2.3.4 → supertable-2.3.6}/supertable/config/tests/test_homedir.py +0 -0
  55. {supertable-2.3.4 → supertable-2.3.6}/supertable/config/tests/test_settings.py +0 -0
  56. {supertable-2.3.4 → supertable-2.3.6}/supertable/data_classes.py +0 -0
  57. {supertable-2.3.4 → supertable-2.3.6}/supertable/data_reader.py +0 -0
  58. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/__init__.py +0 -0
  59. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/__init__.py +0 -0
  60. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/__main__.py +0 -0
  61. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/check_filter_builder.py +0 -0
  62. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/controller.py +0 -0
  63. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/data_writer_helpers.py +0 -0
  64. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/dummy_data.py +0 -0
  65. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/read_parquet_header.py +0 -0
  66. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/s01_01_01_create_super_table.py +0 -0
  67. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/s01_01_02_enable_mirroring_formats.py +0 -0
  68. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/s01_02_create_roles.py +0 -0
  69. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/s01_03_create_users.py +0 -0
  70. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/s02_01_write_dummy_data.py +0 -0
  71. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/s02_02_write_single_data.py +0 -0
  72. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/s02_03_01_write_staging.py +0 -0
  73. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/s02_03_02_create_pipe.py +0 -0
  74. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/s02_04_01_write_monitoring_simple.py +0 -0
  75. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/s02_04_02_write_monitoring_parallel.py +0 -0
  76. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/s02_05_write_tombstone.py +0 -0
  77. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/s03_01_read_data_error.py +0 -0
  78. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/s03_02_01_read_super_data_ok.py +0 -0
  79. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/s03_02_02_read_table_data_ok.py +0 -0
  80. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/s03_03_read_meta.py +0 -0
  81. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/s03_04_read_staging.py +0 -0
  82. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/s03_06_01_read_roles.py +0 -0
  83. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/s03_06_02_read_user.py +0 -0
  84. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/s03_07_01_estimate_read.py +0 -0
  85. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/s03_07_02_estimate_files.py +0 -0
  86. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/s03_08_read_snapshot_history.py +0 -0
  87. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/s04_01_03_delete_pipe.py +0 -0
  88. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/s05_01_delete_table.py +0 -0
  89. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/quickstart/s05_02_delete_super_table.py +0 -0
  90. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/webshop/__init__.py +0 -0
  91. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/webshop/core.py +0 -0
  92. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/webshop/defaults.py +0 -0
  93. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/webshop/generate.py +0 -0
  94. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/webshop/load.py +0 -0
  95. {supertable-2.3.4 → supertable-2.3.6}/supertable/demo/webshop/topup.py +0 -0
  96. {supertable-2.3.4 → supertable-2.3.6}/supertable/engine/__init__.py +0 -0
  97. {supertable-2.3.4 → supertable-2.3.6}/supertable/engine/data_estimator.py +0 -0
  98. {supertable-2.3.4 → supertable-2.3.6}/supertable/engine/duckdb_lite.py +0 -0
  99. {supertable-2.3.4 → supertable-2.3.6}/supertable/engine/duckdb_pro.py +0 -0
  100. {supertable-2.3.4 → supertable-2.3.6}/supertable/engine/engine_config.py +0 -0
  101. {supertable-2.3.4 → supertable-2.3.6}/supertable/engine/engine_enum.py +0 -0
  102. {supertable-2.3.4 → supertable-2.3.6}/supertable/engine/executor.py +0 -0
  103. {supertable-2.3.4 → supertable-2.3.6}/supertable/engine/plan_stats.py +0 -0
  104. {supertable-2.3.4 → supertable-2.3.6}/supertable/engine/spark_thrift.py +0 -0
  105. {supertable-2.3.4 → supertable-2.3.6}/supertable/engine/tests/__init__.py +0 -0
  106. {supertable-2.3.4 → supertable-2.3.6}/supertable/engine/tests/test_engine_config.py +0 -0
  107. {supertable-2.3.4 → supertable-2.3.6}/supertable/engine/tests/test_engine_routing.py +0 -0
  108. {supertable-2.3.4 → supertable-2.3.6}/supertable/engine/tests/test_engine_spill.py +0 -0
  109. {supertable-2.3.4 → supertable-2.3.6}/supertable/errors.py +0 -0
  110. {supertable-2.3.4 → supertable-2.3.6}/supertable/locking/__init__.py +0 -0
  111. {supertable-2.3.4 → supertable-2.3.6}/supertable/locking/benchmarks/__init__.py +0 -0
  112. {supertable-2.3.4 → supertable-2.3.6}/supertable/locking/benchmarks/benchmark_locking.py +0 -0
  113. {supertable-2.3.4 → supertable-2.3.6}/supertable/locking/benchmarks/measure_lock_speed.py +0 -0
  114. {supertable-2.3.4 → supertable-2.3.6}/supertable/locking/benchmarks/measure_lock_time.py +0 -0
  115. {supertable-2.3.4 → supertable-2.3.6}/supertable/locking/file_lock.py +0 -0
  116. {supertable-2.3.4 → supertable-2.3.6}/supertable/locking/redis_lock.py +0 -0
  117. {supertable-2.3.4 → supertable-2.3.6}/supertable/locking/tests/__init__.py +0 -0
  118. {supertable-2.3.4 → supertable-2.3.6}/supertable/locking/tests/test_file_lock.py +0 -0
  119. {supertable-2.3.4 → supertable-2.3.6}/supertable/locking/tests/test_redis_lock.py +0 -0
  120. {supertable-2.3.4 → supertable-2.3.6}/supertable/logging.py +0 -0
  121. {supertable-2.3.4 → supertable-2.3.6}/supertable/meta_reader.py +0 -0
  122. {supertable-2.3.4 → supertable-2.3.6}/supertable/mirroring/__init__.py +0 -0
  123. {supertable-2.3.4 → supertable-2.3.6}/supertable/mirroring/mirror_delta.py +0 -0
  124. {supertable-2.3.4 → supertable-2.3.6}/supertable/mirroring/mirror_formats.py +0 -0
  125. {supertable-2.3.4 → supertable-2.3.6}/supertable/mirroring/mirror_iceberg.py +0 -0
  126. {supertable-2.3.4 → supertable-2.3.6}/supertable/mirroring/mirror_parquet.py +0 -0
  127. {supertable-2.3.4 → supertable-2.3.6}/supertable/monitoring/__init__.py +0 -0
  128. {supertable-2.3.4 → supertable-2.3.6}/supertable/monitoring/partitions.py +0 -0
  129. {supertable-2.3.4 → supertable-2.3.6}/supertable/monitoring_writer.py +0 -0
  130. {supertable-2.3.4 → supertable-2.3.6}/supertable/plan_extender.py +0 -0
  131. {supertable-2.3.4 → supertable-2.3.6}/supertable/query_plan_manager.py +0 -0
  132. {supertable-2.3.4 → supertable-2.3.6}/supertable/rbac/__init__.py +0 -0
  133. {supertable-2.3.4 → supertable-2.3.6}/supertable/rbac/access_control.py +0 -0
  134. {supertable-2.3.4 → supertable-2.3.6}/supertable/rbac/filter_builder.py +0 -0
  135. {supertable-2.3.4 → supertable-2.3.6}/supertable/rbac/permissions.py +0 -0
  136. {supertable-2.3.4 → supertable-2.3.6}/supertable/rbac/role_manager.py +0 -0
  137. {supertable-2.3.4 → supertable-2.3.6}/supertable/rbac/row_column_security.py +0 -0
  138. {supertable-2.3.4 → supertable-2.3.6}/supertable/rbac/tests/test_filter_builder.py +0 -0
  139. {supertable-2.3.4 → supertable-2.3.6}/supertable/rbac/tests/test_rbac.py +0 -0
  140. {supertable-2.3.4 → supertable-2.3.6}/supertable/rbac/tests/test_rbac_per_table.py +0 -0
  141. {supertable-2.3.4 → supertable-2.3.6}/supertable/rbac/user_manager.py +0 -0
  142. {supertable-2.3.4 → supertable-2.3.6}/supertable/redis_catalog.py +0 -0
  143. {supertable-2.3.4 → supertable-2.3.6}/supertable/redis_connector.py +0 -0
  144. {supertable-2.3.4 → supertable-2.3.6}/supertable/redis_infra.py +0 -0
  145. {supertable-2.3.4 → supertable-2.3.6}/supertable/redis_keys.py +0 -0
  146. {supertable-2.3.4 → supertable-2.3.6}/supertable/staging_area.py +0 -0
  147. {supertable-2.3.4 → supertable-2.3.6}/supertable/storage/__init__.py +0 -0
  148. {supertable-2.3.4 → supertable-2.3.6}/supertable/storage/storage_factory.py +0 -0
  149. {supertable-2.3.4 → supertable-2.3.6}/supertable/storage/tests/test_storage.py +0 -0
  150. {supertable-2.3.4 → supertable-2.3.6}/supertable/super_pipe.py +0 -0
  151. {supertable-2.3.4 → supertable-2.3.6}/supertable/system_query.py +0 -0
  152. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/__init__.py +0 -0
  153. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_align_to_schema_fix.py +0 -0
  154. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_create_if_missing.py +0 -0
  155. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_data_reader.py +0 -0
  156. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_data_reader_preflight.py +0 -0
  157. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_data_writer.py +0 -0
  158. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_errors.py +0 -0
  159. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_meta_reader.py +0 -0
  160. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_monitoring_partitions.py +0 -0
  161. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_monitoring_sink_guard.py +0 -0
  162. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_newer_than.py +0 -0
  163. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_parquet_statistics.py +0 -0
  164. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_processing.py +0 -0
  165. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_processing_compact_resources.py +0 -0
  166. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_query_sql.py +0 -0
  167. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_read_pruning_differential.py +0 -0
  168. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_read_pruning_integration.py +0 -0
  169. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_redis_key_prefix.py +0 -0
  170. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_simple_table.py +0 -0
  171. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_stats_cache.py +0 -0
  172. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_stats_pruning.py +0 -0
  173. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_stats_schema_snapshot.py +0 -0
  174. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_super_table.py +0 -0
  175. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_supertable_all.py +0 -0
  176. {supertable-2.3.4 → supertable-2.3.6}/supertable/tests/test_system_query.py +0 -0
  177. {supertable-2.3.4 → supertable-2.3.6}/supertable/utils/__init__.py +0 -0
  178. {supertable-2.3.4 → supertable-2.3.6}/supertable/utils/helper.py +0 -0
  179. {supertable-2.3.4 → supertable-2.3.6}/supertable/utils/profiler.py +0 -0
  180. {supertable-2.3.4 → supertable-2.3.6}/supertable/utils/sql_parser.py +0 -0
  181. {supertable-2.3.4 → supertable-2.3.6}/supertable/utils/tests/test_sql_parser_columns.py +0 -0
  182. {supertable-2.3.4 → supertable-2.3.6}/supertable/utils/timer.py +0 -0
  183. {supertable-2.3.4 → supertable-2.3.6}/supertable.egg-info/SOURCES.txt +0 -0
  184. {supertable-2.3.4 → supertable-2.3.6}/supertable.egg-info/dependency_links.txt +0 -0
  185. {supertable-2.3.4 → supertable-2.3.6}/supertable.egg-info/entry_points.txt +0 -0
  186. {supertable-2.3.4 → supertable-2.3.6}/supertable.egg-info/requires.txt +0 -0
  187. {supertable-2.3.4 → supertable-2.3.6}/supertable.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: supertable
3
- Version: 2.3.4
3
+ Version: 2.3.6
4
4
  Summary: SuperTable — versioned data lake library for SQL analytics on Parquet + Redis.
5
5
  Author: Levente Kupas
6
6
  Author-email: Levente Kupas <lkupas@kladnasoft.com>
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "supertable"
7
- version = "2.3.4"
7
+ version = "2.3.6"
8
8
  description = "SuperTable — versioned data lake library for SQL analytics on Parquet + Redis."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -19,7 +19,7 @@ long_description = readme.read_text(encoding="utf-8") if readme.exists() else ""
19
19
 
20
20
  setup(
21
21
  name="supertable",
22
- version="2.3.4",
22
+ version="2.3.6",
23
23
  description="SuperTable — versioned data lake library for SQL analytics on Parquet + Redis.",
24
24
  long_description=long_description,
25
25
  long_description_content_type="text/markdown",
@@ -25,7 +25,7 @@ See the ``supertable.demo`` package for runnable end-to-end demos and the
25
25
  project documentation for the full API surface.
26
26
  """
27
27
 
28
- __version__ = "2.3.4"
28
+ __version__ = "2.3.6"
29
29
 
30
30
  # Re-export the core public surface so users can do ``from supertable import …``
31
31
  # instead of remembering submodule paths.
@@ -17,6 +17,12 @@ handler.setFormatter(colorlog.ColoredFormatter(
17
17
  logging.basicConfig(level=logging.INFO, handlers=[handler])
18
18
  logger = logging.getLogger(__name__)
19
19
 
20
+ # Quiet noisy third-party HTTP client loggers. At DEBUG these emit one line
21
+ # per request (connection setup + every HEAD/GET/PUT), which drowns out
22
+ # SuperTable's own logs. WARNING keeps genuine connection problems visible.
23
+ for _noisy_logger in ("urllib3", "botocore", "boto3", "s3transfer", "boto"):
24
+ logging.getLogger(_noisy_logger).setLevel(logging.WARNING)
25
+
20
26
  _VALID_LOG_LEVELS = frozenset({"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"})
21
27
 
22
28
  @dataclass(slots=True)
@@ -0,0 +1,96 @@
1
+ import os
2
+ import sys
3
+ import tempfile
4
+
5
+ from supertable.config.settings import settings
6
+ from supertable.config.defaults import logger
7
+
8
+ # If this file is located in a subdirectory, adjust the path logic as needed.
9
+ # Currently appending ".." from __file__ to add the project root directory
10
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
11
+
12
+ # ---------- lazy home directory resolution ----------
13
+ _resolved_home: str | None = None
14
+
15
+ def _is_writable_dir(path: str) -> bool:
16
+ """Create *path* if needed and verify we can actually write a file in it.
17
+
18
+ ``os.access(..., W_OK)`` is unreliable under containers, ACLs and
19
+ root-squashed mounts, so probe with a real create+unlink: this is the
20
+ difference between a home that merely *resolves* and one DuckDB can root
21
+ its temp/spill, cache and extension dirs under.
22
+ """
23
+ try:
24
+ os.makedirs(path, exist_ok=True)
25
+ with tempfile.NamedTemporaryFile(dir=path):
26
+ pass
27
+ return True
28
+ except OSError:
29
+ return False
30
+
31
+ def _resolve_app_home() -> str:
32
+ """
33
+ Resolve, expand, and normalise the application home directory once.
34
+
35
+ The home must be *writable*, not just resolvable: DuckDB roots its
36
+ temp/spill, external-cache and extension directories here, so a
37
+ non-writable home silently breaks every write (the probe fails with
38
+ ``errno 13`` and falls back to the slow full-read path). We therefore
39
+ verify writability and, when the configured home is not usable, fall back
40
+ to ``<tempdir>/supertable`` with a loud warning rather than returning a
41
+ path that only looks valid.
42
+ """
43
+ global _resolved_home
44
+ if _resolved_home is not None:
45
+ return _resolved_home
46
+
47
+ raw = settings.SUPERTABLE_HOME
48
+ expanded = os.path.abspath(os.path.expanduser(raw))
49
+
50
+ if _is_writable_dir(expanded):
51
+ logger.debug(f"Ensured app home directory exists: {expanded}")
52
+ _resolved_home = expanded
53
+ return _resolved_home
54
+
55
+ fallback = os.path.join(tempfile.gettempdir(), "supertable")
56
+ if _is_writable_dir(fallback):
57
+ logger.warning(
58
+ f"SUPERTABLE_HOME={expanded!r} is not writable; falling back to "
59
+ f"{fallback!r}. Set SUPERTABLE_HOME to a writable directory to "
60
+ f"silence this — DuckDB temp/spill, cache and extensions live under it."
61
+ )
62
+ _resolved_home = fallback
63
+ return _resolved_home
64
+
65
+ raise RuntimeError(
66
+ f"No writable application home: tried SUPERTABLE_HOME={expanded!r} and "
67
+ f"fallback {fallback!r}. Set SUPERTABLE_HOME to a writable directory."
68
+ )
69
+
70
+ def change_to_app_home(home_dir: str | None = None) -> None:
71
+ """
72
+ Attempts to change the current working directory to `home_dir`.
73
+ If home_dir is not provided, uses the resolved app home.
74
+ Logs the outcome.
75
+ """
76
+ target = home_dir if home_dir else _resolve_app_home()
77
+ expanded_dir = os.path.expanduser(target)
78
+ try:
79
+ os.chdir(expanded_dir)
80
+ logger.debug(f"Changed working directory to {expanded_dir}")
81
+ except Exception as e:
82
+ logger.error(f"Failed to change working directory to {expanded_dir}: {e}")
83
+
84
+ # ---------- eager init (preserves original import-time behaviour) ----------
85
+ _app_home = _resolve_app_home()
86
+ change_to_app_home(_app_home)
87
+ logger.debug(f"Current working directory: {os.getcwd()}")
88
+
89
+ # ---------- public API ----------
90
+
91
+ # Kept for backward compatibility; prefer get_app_home() for the expanded path.
92
+ app_home = _app_home
93
+
94
+ def get_app_home() -> str:
95
+ """Return the fully expanded, absolute application home directory."""
96
+ return _resolve_app_home()
@@ -5,6 +5,7 @@ import json
5
5
  import os
6
6
  import time
7
7
  import uuid
8
+ from concurrent.futures import ThreadPoolExecutor
8
9
  from datetime import datetime, timezone
9
10
  import re
10
11
 
@@ -343,8 +344,18 @@ class DataWriter:
343
344
  # layout and tight row-group zonemaps). Together with __rowid__ it
344
345
  # is hidden from query output by the read view's
345
346
  # ``EXCLUDE (__rowid__, __timestamp__)`` projection.
347
+ #
348
+ # System-owned, exactly like __rowid__ above: ALWAYS overwrite any
349
+ # caller-supplied __timestamp__ instead of preserving it. It is a
350
+ # reserved internal column that is both the dedup ORDER BY key (newest
351
+ # per key wins) and the source of the __p_year__/month/day partition
352
+ # derivation (processing.py); letting a caller inject an arbitrary value
353
+ # (wrong dtype, non-UTC, or chosen to game which row wins) would
354
+ # silently corrupt partitioning and dedup. ``newer_than`` is the
355
+ # supported, explicit mechanism for caller-controlled conflict
356
+ # resolution.
346
357
  table_config = self._get_table_config(simple_name)
347
- if not delete_only and "__timestamp__" not in dataframe.columns:
358
+ if not delete_only:
348
359
  dataframe = dataframe.with_columns(
349
360
  polars.lit(datetime.now(timezone.utc)).alias("__timestamp__")
350
361
  )
@@ -371,6 +382,14 @@ class DataWriter:
371
382
  profiler=profiler,
372
383
  )
373
384
  mark("overlap")
385
+ if overwrite_columns:
386
+ _snap_files = len(last_simple_table.get("resources") or [])
387
+ _cand = sum(1 for _, ov, _ in overlapping_files if ov)
388
+ logger.debug(lp(
389
+ f"step[overlap]: {_cand}/{_snap_files} existing file(s) are overwrite "
390
+ f"candidates on {overwrite_columns} "
391
+ f"(snapshot has no per-file key stats → every file is suspect)"
392
+ ))
374
393
 
375
394
  # --- Stats-driven file pruning (consumer 5a) ----------------------
376
395
  # Narrow the overwrite/delete candidate set using the external stats
@@ -387,6 +406,10 @@ class DataWriter:
387
406
  stored_stats_df = load_stats(stats_file, allow_cache=True, profiler=profiler)
388
407
  if stored_stats_df is not None and stored_stats_df.height > 0:
389
408
  probe = probe_ranges_from_df(dataframe, overwrite_columns)
409
+ _probe_desc = {
410
+ c: (f"{v[0]}[{v[1]}..{v[2]}]" if v else "unconstrained(null/unsupported)")
411
+ for c, v in probe.items()
412
+ }
390
413
  before = len(overlapping_files)
391
414
  overlapping_files = prune_overlapping_files_by_stats(
392
415
  overlapping_files,
@@ -395,8 +418,21 @@ class DataWriter:
395
418
  profiler=profiler,
396
419
  )
397
420
  pruned = before - len(overlapping_files)
421
+ logger.debug(lp(
422
+ f"step[stats-prune]: df-probe {_probe_desc} vs {stored_stats_df.height} "
423
+ f"stored stat row(s) → kept {len(overlapping_files)}/{before}, "
424
+ f"pruned {pruned} (no data file opened)"
425
+ ))
398
426
  if pruned > 0:
399
427
  logger.info(lp(f"stats pruning: skipped {pruned}/{before} candidate files"))
428
+ else:
429
+ logger.debug(lp(
430
+ "step[stats-prune]: stats artifact empty → no pruning, all candidates retained"
431
+ ))
432
+ else:
433
+ logger.debug(lp(
434
+ "step[stats-prune]: snapshot has no stats_file → no pruning, all candidates retained"
435
+ ))
400
436
  mark("stats_prune")
401
437
 
402
438
  # File cache: used only by delete_only's identify_all_rowids below.
@@ -420,6 +456,16 @@ class DataWriter:
420
456
  newer_than_col=newer_than,
421
457
  profiler=profiler,
422
458
  )
459
+ mark("resolve_overwrite")
460
+ _counts = profiler.counts
461
+ _fallback = bool(_counts.get("overwrite_resolve_fallback"))
462
+ logger.debug(lp(
463
+ f"step[probe-resolve] via {'polars-fallback' if _fallback else 'duckdb-pushdown'}: "
464
+ f"matched {_counts.get('probe_rows_matched', _counts.get('delete_rows_matched', 0))} "
465
+ f"existing row(s) on {overwrite_columns} → "
466
+ f"{len(resolved_delete_pairs or [])} (file,__rowid__) delete pair(s); "
467
+ f"{dataframe.height}/{pre_filter_count} incoming row(s) survive"
468
+ ))
423
469
  if newer_than:
424
470
  skipped = pre_filter_count - dataframe.height
425
471
  if skipped > 0:
@@ -476,12 +522,21 @@ class DataWriter:
476
522
  # Load the current deletion-vector once: used both to exclude
477
523
  # already-tombstoned rows from this write's deletes (below) and,
478
524
  # via prev_df, to extend the vector without a second read.
525
+ # required=True: a DV that exists but cannot be read must abort
526
+ # the write, never be treated as empty — silently dropping the
527
+ # carried-forward vector would resurrect previously deleted rows.
479
528
  prev_dv_df = (
480
- _read_parquet_safe(prev_tombstone_path, profiler=profiler)
529
+ _read_parquet_safe(prev_tombstone_path, profiler=profiler, required=True)
481
530
  if prev_tombstone_path else None
482
531
  )
532
+ # The rowid set is consumed only by the idempotency filter below,
533
+ # which runs only when this write actually tombstones rows
534
+ # (overwrite or delete_only). Pure appends tombstone nothing, so
535
+ # skip materialising the whole deletion-vector as a Python set —
536
+ # prev_dv_df is still carried forward into build_tombstone_file.
483
537
  prev_dv_rowids = set()
484
- if prev_dv_df is not None and "__rowid__" in prev_dv_df.columns:
538
+ if (overwrite_columns or delete_only) and prev_dv_df is not None \
539
+ and "__rowid__" in prev_dv_df.columns:
485
540
  prev_dv_rowids = set(prev_dv_df.get_column("__rowid__").to_list())
486
541
 
487
542
  # 1. Identify which existing rows this write deletes/replaces.
@@ -515,34 +570,91 @@ class DataWriter:
515
570
  ]
516
571
  deleted = len(new_delete_pairs)
517
572
  mark("identify_deletes")
573
+ logger.debug(lp(
574
+ f"step[deletes]: tombstoning {deleted} live row(s) this write "
575
+ f"(excluded {len(prev_dv_rowids)} row(s) already in the deletion-vector)"
576
+ ))
518
577
 
519
- # 2. Write the incoming rows as a new file (insert/upsert side).
520
- # delete_only carries only predicate columns — nothing to insert.
521
- if not delete_only and dataframe.height > 0:
578
+ # 2. + 3. Write the incoming rows as a new data file (insert/
579
+ # upsert side) AND carry-forward/extend the deletion-vector
580
+ # tombstone file. These two object-store PUTs are independent:
581
+ # neither reads the other's output and they write to disjoint
582
+ # dirs (data/ vs tombstone/), so they run concurrently to
583
+ # overlap the two round-trips. delete_only carries only
584
+ # predicate columns → nothing to insert. No new deletes →
585
+ # build_tombstone reuses the previous file (combined_df=None).
586
+ #
587
+ # Profiler is NOT thread-safe, so each branch records into its
588
+ # own sub-profiler which the parent merges after the join;
589
+ # each branch also measures its own wall time so the per-phase
590
+ # monitoring timings stay meaningful despite the overlap.
591
+ # Footers of files written via the write_bytes path are captured
592
+ # in footer_md_cache so stats extraction (step 6) reuses them
593
+ # instead of re-downloading each freshly-written file.
594
+ footer_md_cache = {}
595
+ tombstone_dir = os.path.join(simple_table.simple_dir, "tombstone")
596
+ do_insert = (not delete_only and dataframe.height > 0)
597
+
598
+ def _write_data_branch():
599
+ sub = Profiler()
600
+ t = time.perf_counter()
522
601
  write_parquet_and_collect_resources(
523
602
  write_df=dataframe,
524
603
  overwrite_columns=[],
525
604
  data_dir=simple_table.data_dir,
526
605
  new_resources=new_resources,
527
606
  compression_level=compression_level,
528
- profiler=profiler,
607
+ profiler=sub,
608
+ footer_md_out=footer_md_cache,
529
609
  )
610
+ return sub, time.perf_counter() - t
611
+
612
+ def _write_tombstone_branch():
613
+ sub = Profiler()
614
+ t = time.perf_counter()
615
+ tp, cdf = build_tombstone_file(
616
+ tombstone_dir=tombstone_dir,
617
+ prev_tombstone_path=prev_tombstone_path,
618
+ new_pairs=new_delete_pairs,
619
+ compression_level=compression_level,
620
+ profiler=sub,
621
+ prev_df=prev_dv_df,
622
+ )
623
+ return tp, cdf, sub, time.perf_counter() - t
624
+
625
+ if do_insert:
626
+ with ThreadPoolExecutor(max_workers=2) as _ex:
627
+ _f_data = _ex.submit(_write_data_branch)
628
+ _f_tomb = _ex.submit(_write_tombstone_branch)
629
+ # .result() re-raises in the parent: a failure in either
630
+ # PUT aborts the write before any snapshot commit, exactly
631
+ # as the former sequential path did (an orphaned immutable
632
+ # file no snapshot references is harmless garbage).
633
+ data_sub, data_secs = _f_data.result()
634
+ tombstone_path, combined_tombstone_df, tomb_sub, tomb_secs = (
635
+ _f_tomb.result()
636
+ )
637
+ profiler.merge(data_sub)
638
+ profiler.merge(tomb_sub)
530
639
  inserted = dataframe.height
531
640
  else:
641
+ tombstone_path, combined_tombstone_df, tomb_sub, tomb_secs = (
642
+ _write_tombstone_branch()
643
+ )
644
+ profiler.merge(tomb_sub)
645
+ data_secs = 0.0
532
646
  inserted = 0
533
- mark("write_parquet")
534
647
 
535
- # 3. Carry forward + extend the deletion-vector tombstone file.
536
- # No new deletes reuse the previous file (combined_df=None).
537
- tombstone_dir = os.path.join(simple_table.simple_dir, "tombstone")
538
- tombstone_path, combined_tombstone_df = build_tombstone_file(
539
- tombstone_dir=tombstone_dir,
540
- prev_tombstone_path=prev_tombstone_path,
541
- new_pairs=new_delete_pairs,
542
- compression_level=compression_level,
543
- profiler=profiler,
544
- prev_df=prev_dv_df,
545
- )
648
+ # Assign the two per-phase timings from each branch's own measured
649
+ # wall time (they overlapped, so the serial mark() deltas would
650
+ # misattribute the time), then advance the mark() baseline.
651
+ timings["write_parquet"] = data_secs
652
+ timings["build_tombstone"] = tomb_secs
653
+ t_last = time.time()
654
+ logger.debug(lp(
655
+ f"step[write]: appended {inserted} incoming row(s) as {len(new_resources)} "
656
+ f"new immutable file(s) (no existing data file rewritten)"
657
+ ))
546
658
 
547
659
  # Track the live deletion-vector row count so meta reads can
548
660
  # deduct dead rows from the physical resource row totals.
@@ -553,7 +665,10 @@ class DataWriter:
553
665
  if combined_tombstone_df is not None
554
666
  else int(last_simple_table.get("tombstone_rows", 0) or 0)
555
667
  )
556
- mark("build_tombstone")
668
+ logger.debug(lp(
669
+ f"step[tombstone]: deletion-vector now {tombstone_rows} row(s) "
670
+ f"({'rewritten' if combined_tombstone_df is not None else 'carried forward unchanged'})"
671
+ ))
557
672
 
558
673
  # 3b. Eager reclamation of fully-dead files. Any existing data
559
674
  # file whose every physical row is now tombstoned is 100%
@@ -698,7 +813,9 @@ class DataWriter:
698
813
  r.get("file") for r in new_resources
699
814
  if isinstance(r, dict) and r.get("file")
700
815
  ]
701
- new_stats_rows = extract_stats_rows(new_data_files, profiler=profiler)
816
+ new_stats_rows = extract_stats_rows(
817
+ new_data_files, profiler=profiler, footer_md_cache=footer_md_cache
818
+ )
702
819
  stats_path, combined_stats_df = build_stats_file(
703
820
  stats_dir=stats_dir,
704
821
  prev_stats_path=last_simple_table.get("stats_file"),
@@ -812,7 +929,7 @@ class DataWriter:
812
929
  schema_json = "{}"
813
930
  _org, _sup = self.super_table.organization, self.super_table.super_name
814
931
  self.catalog.r.set(RK.schema(_org, _sup, simple_name), schema_json)
815
- self.catalog.r.sadd(RK.table_names(_org, _sup), simple_name)
932
+ self.catalog.r.sadd(RK.meta_table_names(_org, _sup), simple_name)
816
933
  except Exception as e:
817
934
  logger.debug(f"[data-writer] schema/table_names Redis write failed: {e}")
818
935
 
@@ -862,7 +979,7 @@ class DataWriter:
862
979
  f"total={total_duration:.3f} | "
863
980
  f"convert={timings.get('convert', 0):.3f} | dedup_ts={timings.get('dedup_ts', 0):.3f} | validate={timings.get('validate', 0):.3f} | "
864
981
  f"lock={timings.get('lock', 0):.3f} | snapshot={timings.get('snapshot', 0):.3f} | "
865
- f"overlap={timings.get('overlap', 0):.3f} | stats_prune={timings.get('stats_prune', 0):.3f} | newer_than={timings.get('newer_than', 0):.3f} | "
982
+ f"overlap={timings.get('overlap', 0):.3f} | stats_prune={timings.get('stats_prune', 0):.3f} | resolve_overwrite={timings.get('resolve_overwrite', 0):.3f} | newer_than={timings.get('newer_than', 0):.3f} | "
866
983
  f"identify_deletes={timings.get('identify_deletes', 0):.3f} | write_parquet={timings.get('write_parquet', 0):.3f} | "
867
984
  f"build_tombstone={timings.get('build_tombstone', 0):.3f} | compact_tombstones={timings.get('compact_tombstones', 0):.3f} | compact_small={timings.get('compact_small', 0):.3f} | build_stats={timings.get('build_stats', 0):.3f} | "
868
985
  f"update_simple={timings.get('update_simple', 0):.3f} | bump_root={timings.get('bump_root', 0):.3f} | "
@@ -1134,8 +1251,17 @@ class DataWriter:
1134
1251
  # the *write* path; compact() is explicit maintenance and always
1135
1252
  # consumes the vector.
1136
1253
  tombstone_path = last_simple_table.get("tombstone")
1254
+ # required=True: a DV that exists but cannot be read must abort the
1255
+ # compaction, never be treated as empty. A swallowed read here would
1256
+ # set should_run_tombstones=False, skipping both Phase A and the
1257
+ # pointer-clear below, so Phase B would carry the dead rows into the
1258
+ # new file while the vector kept pointing at the sunset __file__ —
1259
+ # leaving them permanently unreclaimable. Failing loud leaves the
1260
+ # prior snapshot + vector intact for a retry, and matches the
1261
+ # write-path carry-forward read (required=True) above.
1137
1262
  tombstone_df = (
1138
- _read_parquet_safe(tombstone_path) if tombstone_path else None
1263
+ _read_parquet_safe(tombstone_path, required=True)
1264
+ if tombstone_path else None
1139
1265
  )
1140
1266
  tombstone_rows = (
1141
1267
  tombstone_df.height if tombstone_df is not None else 0
@@ -1199,6 +1325,24 @@ class DataWriter:
1199
1325
  r for r in (list(tomb_new_resources) + list(small_new_resources))
1200
1326
  if r.get("file") not in all_sunset
1201
1327
  ]
1328
+ # ``all_new_resources`` is the full set of files written by THIS
1329
+ # compaction; it feeds stats extraction, the schema model_df and the
1330
+ # result metrics below, all of which need every new file.
1331
+ #
1332
+ # For ``simple_table.update`` it must NOT be reused verbatim, though:
1333
+ # Phase A's outputs were already spliced into
1334
+ # ``last_simple_table["resources"]`` (the in-memory baseline that
1335
+ # ``update`` starts from) right after Phase A ran. ``update`` does
1336
+ # ``(baseline - sunset) + new_resources`` with no dedup, so any
1337
+ # Phase-A output that Phase B did NOT consume (left un-sunset because
1338
+ # it exceeded the ``small_only`` threshold, or its read failed) would
1339
+ # be counted once from the baseline AND once from new_resources —
1340
+ # i.e. the same file listed twice in the new snapshot. Hand ``update``
1341
+ # only Phase B's brand-new files, which are the only resources genuinely
1342
+ # absent from that baseline.
1343
+ update_new_resources = [
1344
+ r for r in small_new_resources if r.get("file") not in all_sunset
1345
+ ]
1202
1346
  result["files_compacted"] = considered
1203
1347
  result["new_resources"] = len(all_new_resources)
1204
1348
  result["sunset_files"] = len(all_sunset)
@@ -1291,7 +1435,7 @@ class DataWriter:
1291
1435
  )
1292
1436
 
1293
1437
  new_snapshot_dict, new_snapshot_path = simple_table.update(
1294
- all_new_resources,
1438
+ update_new_resources,
1295
1439
  all_sunset,
1296
1440
  model_df,
1297
1441
  last_snapshot=last_simple_table,
@@ -14,7 +14,9 @@ from enum import Enum
14
14
 
15
15
  from supertable.config import defaults
16
16
 
17
- logging.getLogger("supertable").setLevel(logging.INFO)
17
+ # Follow the configured SUPERTABLE_LOG_LEVEL (resolved in supertable.config.defaults)
18
+ # instead of hard-pinning INFO, so DEBUG surfaces the detailed write step[...] logs.
19
+ logging.getLogger("supertable").setLevel(defaults.default.LOG_LEVEL)
18
20
 
19
21
  defaults.default.IS_SHOW_TIMING = True
20
22