supertable 2.0.3__tar.gz → 2.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. {supertable-2.0.3/supertable.egg-info → supertable-2.0.4}/PKG-INFO +1 -1
  2. {supertable-2.0.3 → supertable-2.0.4}/pyproject.toml +1 -1
  3. {supertable-2.0.3 → supertable-2.0.4}/setup.py +1 -1
  4. {supertable-2.0.3 → supertable-2.0.4}/supertable/__init__.py +1 -1
  5. {supertable-2.0.3 → supertable-2.0.4}/supertable/processing.py +103 -11
  6. supertable-2.0.4/supertable/tests/test_align_to_schema_fix.py +475 -0
  7. {supertable-2.0.3 → supertable-2.0.4}/supertable/tests/test_supertable_all.py +2 -2
  8. {supertable-2.0.3 → supertable-2.0.4/supertable.egg-info}/PKG-INFO +1 -1
  9. {supertable-2.0.3 → supertable-2.0.4}/supertable.egg-info/SOURCES.txt +1 -0
  10. {supertable-2.0.3 → supertable-2.0.4}/LICENSE +0 -0
  11. {supertable-2.0.3 → supertable-2.0.4}/README.md +0 -0
  12. {supertable-2.0.3 → supertable-2.0.4}/requirements.txt +0 -0
  13. {supertable-2.0.3 → supertable-2.0.4}/setup.cfg +0 -0
  14. {supertable-2.0.3 → supertable-2.0.4}/supertable/audit/__init__.py +0 -0
  15. {supertable-2.0.3 → supertable-2.0.4}/supertable/audit/admin.py +0 -0
  16. {supertable-2.0.3 → supertable-2.0.4}/supertable/audit/chain.py +0 -0
  17. {supertable-2.0.3 → supertable-2.0.4}/supertable/audit/consumers.py +0 -0
  18. {supertable-2.0.3 → supertable-2.0.4}/supertable/audit/crypto.py +0 -0
  19. {supertable-2.0.3 → supertable-2.0.4}/supertable/audit/events.py +0 -0
  20. {supertable-2.0.3 → supertable-2.0.4}/supertable/audit/export.py +0 -0
  21. {supertable-2.0.3 → supertable-2.0.4}/supertable/audit/logger.py +0 -0
  22. {supertable-2.0.3 → supertable-2.0.4}/supertable/audit/middleware.py +0 -0
  23. {supertable-2.0.3 → supertable-2.0.4}/supertable/audit/reader.py +0 -0
  24. {supertable-2.0.3 → supertable-2.0.4}/supertable/audit/retention.py +0 -0
  25. {supertable-2.0.3 → supertable-2.0.4}/supertable/audit/tests/__init__.py +0 -0
  26. {supertable-2.0.3 → supertable-2.0.4}/supertable/audit/tests/test_chain.py +0 -0
  27. {supertable-2.0.3 → supertable-2.0.4}/supertable/audit/tests/test_crypto.py +0 -0
  28. {supertable-2.0.3 → supertable-2.0.4}/supertable/audit/tests/test_emit.py +0 -0
  29. {supertable-2.0.3 → supertable-2.0.4}/supertable/audit/tests/test_events.py +0 -0
  30. {supertable-2.0.3 → supertable-2.0.4}/supertable/audit/tests/test_retention.py +0 -0
  31. {supertable-2.0.3 → supertable-2.0.4}/supertable/audit/writer_parquet.py +0 -0
  32. {supertable-2.0.3 → supertable-2.0.4}/supertable/audit/writer_redis.py +0 -0
  33. {supertable-2.0.3 → supertable-2.0.4}/supertable/config/__init__.py +0 -0
  34. {supertable-2.0.3 → supertable-2.0.4}/supertable/config/defaults.py +0 -0
  35. {supertable-2.0.3 → supertable-2.0.4}/supertable/config/homedir.py +0 -0
  36. {supertable-2.0.3 → supertable-2.0.4}/supertable/config/settings.py +0 -0
  37. {supertable-2.0.3 → supertable-2.0.4}/supertable/config/tests/__init__.py +0 -0
  38. {supertable-2.0.3 → supertable-2.0.4}/supertable/config/tests/test_defaults.py +0 -0
  39. {supertable-2.0.3 → supertable-2.0.4}/supertable/config/tests/test_homedir.py +0 -0
  40. {supertable-2.0.3 → supertable-2.0.4}/supertable/config/tests/test_settings.py +0 -0
  41. {supertable-2.0.3 → supertable-2.0.4}/supertable/data_classes.py +0 -0
  42. {supertable-2.0.3 → supertable-2.0.4}/supertable/data_reader.py +0 -0
  43. {supertable-2.0.3 → supertable-2.0.4}/supertable/data_writer.py +0 -0
  44. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/__init__.py +0 -0
  45. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/__init__.py +0 -0
  46. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/__main__.py +0 -0
  47. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/check_filter_builder.py +0 -0
  48. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/controller.py +0 -0
  49. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/data_writer_helpers.py +0 -0
  50. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/defaults.py +0 -0
  51. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/dummy_data.py +0 -0
  52. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/read_parquet_header.py +0 -0
  53. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/s01_01_01_create_super_table.py +0 -0
  54. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/s01_01_02_enable_mirroring_formats.py +0 -0
  55. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/s01_02_create_roles.py +0 -0
  56. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/s01_03_create_users.py +0 -0
  57. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/s02_01_write_dummy_data.py +0 -0
  58. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/s02_02_write_single_data.py +0 -0
  59. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/s02_03_01_write_staging.py +0 -0
  60. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/s02_03_02_create_pipe.py +0 -0
  61. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/s02_04_01_write_monitoring_simple.py +0 -0
  62. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/s02_04_02_write_monitoring_parallel.py +0 -0
  63. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/s02_05_write_tombstone.py +0 -0
  64. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/s03_01_read_data_error.py +0 -0
  65. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/s03_02_01_read_super_data_ok.py +0 -0
  66. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/s03_02_02_read_table_data_ok.py +0 -0
  67. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/s03_03_read_meta.py +0 -0
  68. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/s03_04_read_staging.py +0 -0
  69. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/s03_06_01_read_roles.py +0 -0
  70. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/s03_06_02_read_user.py +0 -0
  71. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/s03_07_01_estimate_read.py +0 -0
  72. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/s03_07_02_estimate_files.py +0 -0
  73. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/s03_08_read_snapshot_history.py +0 -0
  74. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/s04_01_03_delete_pipe.py +0 -0
  75. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/s05_01_delete_table.py +0 -0
  76. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/quickstart/s05_02_delete_super_table.py +0 -0
  77. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/webshop/__init__.py +0 -0
  78. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/webshop/core.py +0 -0
  79. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/webshop/defaults.py +0 -0
  80. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/webshop/generate.py +0 -0
  81. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/webshop/load.py +0 -0
  82. {supertable-2.0.3 → supertable-2.0.4}/supertable/demo/webshop/topup.py +0 -0
  83. {supertable-2.0.3 → supertable-2.0.4}/supertable/engine/__init__.py +0 -0
  84. {supertable-2.0.3 → supertable-2.0.4}/supertable/engine/data_estimator.py +0 -0
  85. {supertable-2.0.3 → supertable-2.0.4}/supertable/engine/duckdb_lite.py +0 -0
  86. {supertable-2.0.3 → supertable-2.0.4}/supertable/engine/duckdb_pro.py +0 -0
  87. {supertable-2.0.3 → supertable-2.0.4}/supertable/engine/engine_common.py +0 -0
  88. {supertable-2.0.3 → supertable-2.0.4}/supertable/engine/engine_enum.py +0 -0
  89. {supertable-2.0.3 → supertable-2.0.4}/supertable/engine/executor.py +0 -0
  90. {supertable-2.0.3 → supertable-2.0.4}/supertable/engine/plan_stats.py +0 -0
  91. {supertable-2.0.3 → supertable-2.0.4}/supertable/engine/spark_thrift.py +0 -0
  92. {supertable-2.0.3 → supertable-2.0.4}/supertable/engine/tests/__init__.py +0 -0
  93. {supertable-2.0.3 → supertable-2.0.4}/supertable/engine/tests/conftest.py +0 -0
  94. {supertable-2.0.3 → supertable-2.0.4}/supertable/engine/tests/test_dedup_read.py +0 -0
  95. {supertable-2.0.3 → supertable-2.0.4}/supertable/engine/tests/test_engine.py +0 -0
  96. {supertable-2.0.3 → supertable-2.0.4}/supertable/locking/__init__.py +0 -0
  97. {supertable-2.0.3 → supertable-2.0.4}/supertable/locking/benchmarks/__init__.py +0 -0
  98. {supertable-2.0.3 → supertable-2.0.4}/supertable/locking/benchmarks/benchmark_locking.py +0 -0
  99. {supertable-2.0.3 → supertable-2.0.4}/supertable/locking/benchmarks/measure_lock_speed.py +0 -0
  100. {supertable-2.0.3 → supertable-2.0.4}/supertable/locking/benchmarks/measure_lock_time.py +0 -0
  101. {supertable-2.0.3 → supertable-2.0.4}/supertable/locking/file_lock.py +0 -0
  102. {supertable-2.0.3 → supertable-2.0.4}/supertable/locking/redis_lock.py +0 -0
  103. {supertable-2.0.3 → supertable-2.0.4}/supertable/locking/tests/__init__.py +0 -0
  104. {supertable-2.0.3 → supertable-2.0.4}/supertable/locking/tests/test_file_lock.py +0 -0
  105. {supertable-2.0.3 → supertable-2.0.4}/supertable/locking/tests/test_redis_lock.py +0 -0
  106. {supertable-2.0.3 → supertable-2.0.4}/supertable/logging.py +0 -0
  107. {supertable-2.0.3 → supertable-2.0.4}/supertable/meta_reader.py +0 -0
  108. {supertable-2.0.3 → supertable-2.0.4}/supertable/mirroring/__init__.py +0 -0
  109. {supertable-2.0.3 → supertable-2.0.4}/supertable/mirroring/mirror_delta.py +0 -0
  110. {supertable-2.0.3 → supertable-2.0.4}/supertable/mirroring/mirror_formats.py +0 -0
  111. {supertable-2.0.3 → supertable-2.0.4}/supertable/mirroring/mirror_iceberg.py +0 -0
  112. {supertable-2.0.3 → supertable-2.0.4}/supertable/mirroring/mirror_parquet.py +0 -0
  113. {supertable-2.0.3 → supertable-2.0.4}/supertable/monitoring_writer.py +0 -0
  114. {supertable-2.0.3 → supertable-2.0.4}/supertable/plan_extender.py +0 -0
  115. {supertable-2.0.3 → supertable-2.0.4}/supertable/query_plan_manager.py +0 -0
  116. {supertable-2.0.3 → supertable-2.0.4}/supertable/rbac/__init__.py +0 -0
  117. {supertable-2.0.3 → supertable-2.0.4}/supertable/rbac/access_control.py +0 -0
  118. {supertable-2.0.3 → supertable-2.0.4}/supertable/rbac/filter_builder.py +0 -0
  119. {supertable-2.0.3 → supertable-2.0.4}/supertable/rbac/permissions.py +0 -0
  120. {supertable-2.0.3 → supertable-2.0.4}/supertable/rbac/role_manager.py +0 -0
  121. {supertable-2.0.3 → supertable-2.0.4}/supertable/rbac/row_column_security.py +0 -0
  122. {supertable-2.0.3 → supertable-2.0.4}/supertable/rbac/tests/test_filter_builder.py +0 -0
  123. {supertable-2.0.3 → supertable-2.0.4}/supertable/rbac/tests/test_rbac.py +0 -0
  124. {supertable-2.0.3 → supertable-2.0.4}/supertable/rbac/tests/test_rbac_per_table.py +0 -0
  125. {supertable-2.0.3 → supertable-2.0.4}/supertable/rbac/user_manager.py +0 -0
  126. {supertable-2.0.3 → supertable-2.0.4}/supertable/redis_catalog.py +0 -0
  127. {supertable-2.0.3 → supertable-2.0.4}/supertable/redis_connector.py +0 -0
  128. {supertable-2.0.3 → supertable-2.0.4}/supertable/redis_infra.py +0 -0
  129. {supertable-2.0.3 → supertable-2.0.4}/supertable/redis_keys.py +0 -0
  130. {supertable-2.0.3 → supertable-2.0.4}/supertable/service_registry.py +0 -0
  131. {supertable-2.0.3 → supertable-2.0.4}/supertable/simple_table.py +0 -0
  132. {supertable-2.0.3 → supertable-2.0.4}/supertable/staging_area.py +0 -0
  133. {supertable-2.0.3 → supertable-2.0.4}/supertable/storage/__init__.py +0 -0
  134. {supertable-2.0.3 → supertable-2.0.4}/supertable/storage/azure_storage.py +0 -0
  135. {supertable-2.0.3 → supertable-2.0.4}/supertable/storage/gcp_storage.py +0 -0
  136. {supertable-2.0.3 → supertable-2.0.4}/supertable/storage/local_storage.py +0 -0
  137. {supertable-2.0.3 → supertable-2.0.4}/supertable/storage/minio_storage.py +0 -0
  138. {supertable-2.0.3 → supertable-2.0.4}/supertable/storage/s3_storage.py +0 -0
  139. {supertable-2.0.3 → supertable-2.0.4}/supertable/storage/storage_factory.py +0 -0
  140. {supertable-2.0.3 → supertable-2.0.4}/supertable/storage/storage_interface.py +0 -0
  141. {supertable-2.0.3 → supertable-2.0.4}/supertable/storage/tests/test_storage.py +0 -0
  142. {supertable-2.0.3 → supertable-2.0.4}/supertable/super_pipe.py +0 -0
  143. {supertable-2.0.3 → supertable-2.0.4}/supertable/super_table.py +0 -0
  144. {supertable-2.0.3 → supertable-2.0.4}/supertable/tests/__init__.py +0 -0
  145. {supertable-2.0.3 → supertable-2.0.4}/supertable/tests/test_data_reader.py +0 -0
  146. {supertable-2.0.3 → supertable-2.0.4}/supertable/tests/test_data_writer.py +0 -0
  147. {supertable-2.0.3 → supertable-2.0.4}/supertable/tests/test_data_writer_comprehensive.py +0 -0
  148. {supertable-2.0.3 → supertable-2.0.4}/supertable/tests/test_data_writer_tombstones.py +0 -0
  149. {supertable-2.0.3 → supertable-2.0.4}/supertable/tests/test_dedup_on_read_write.py +0 -0
  150. {supertable-2.0.3 → supertable-2.0.4}/supertable/tests/test_meta_reader.py +0 -0
  151. {supertable-2.0.3 → supertable-2.0.4}/supertable/tests/test_newer_than.py +0 -0
  152. {supertable-2.0.3 → supertable-2.0.4}/supertable/tests/test_process_delete_only.py +0 -0
  153. {supertable-2.0.3 → supertable-2.0.4}/supertable/tests/test_processing.py +0 -0
  154. {supertable-2.0.3 → supertable-2.0.4}/supertable/tests/test_query_sql.py +0 -0
  155. {supertable-2.0.3 → supertable-2.0.4}/supertable/tests/test_redis_key_prefix.py +0 -0
  156. {supertable-2.0.3 → supertable-2.0.4}/supertable/tests/test_simple_table.py +0 -0
  157. {supertable-2.0.3 → supertable-2.0.4}/supertable/tests/test_small_file_compaction.py +0 -0
  158. {supertable-2.0.3 → supertable-2.0.4}/supertable/tests/test_super_table.py +0 -0
  159. {supertable-2.0.3 → supertable-2.0.4}/supertable/utils/__init__.py +0 -0
  160. {supertable-2.0.3 → supertable-2.0.4}/supertable/utils/helper.py +0 -0
  161. {supertable-2.0.3 → supertable-2.0.4}/supertable/utils/sql_parser.py +0 -0
  162. {supertable-2.0.3 → supertable-2.0.4}/supertable/utils/tests/test_sql_parser_columns.py +0 -0
  163. {supertable-2.0.3 → supertable-2.0.4}/supertable/utils/timer.py +0 -0
  164. {supertable-2.0.3 → supertable-2.0.4}/supertable.egg-info/dependency_links.txt +0 -0
  165. {supertable-2.0.3 → supertable-2.0.4}/supertable.egg-info/entry_points.txt +0 -0
  166. {supertable-2.0.3 → supertable-2.0.4}/supertable.egg-info/requires.txt +0 -0
  167. {supertable-2.0.3 → supertable-2.0.4}/supertable.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: supertable
3
- Version: 2.0.3
3
+ Version: 2.0.4
4
4
  Summary: SuperTable — versioned data lake library for SQL analytics on Parquet + Redis.
5
5
  Author: Levente Kupas
6
6
  Author-email: Levente Kupas <lkupas@kladnasoft.com>
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "supertable"
7
- version = "2.0.3"
7
+ version = "2.0.4"
8
8
  description = "SuperTable — versioned data lake library for SQL analytics on Parquet + Redis."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -19,7 +19,7 @@ long_description = readme.read_text(encoding="utf-8") if readme.exists() else ""
19
19
 
20
20
  setup(
21
21
  name="supertable",
22
- version="2.0.3",
22
+ version="2.0.4",
23
23
  description="SuperTable — versioned data lake library for SQL analytics on Parquet + Redis.",
24
24
  long_description=long_description,
25
25
  long_description_content_type="text/markdown",
@@ -25,7 +25,7 @@ See the ``supertable.demo`` package for runnable end-to-end demos and the
25
25
  project documentation for the full API surface.
26
26
  """
27
27
 
28
- __version__ = "2.0.3"
28
+ __version__ = "2.0.4"
29
29
 
30
30
  # Re-export the core public surface so users can do ``from supertable import …``
31
31
  # instead of remembering submodule paths.
@@ -75,37 +75,129 @@ def _resolve_unified_dtype(dtypes: Set[polars.DataType]) -> polars.DataType:
75
75
  return polars.Utf8
76
76
 
77
77
 
78
- def _union_schema(a: polars.DataFrame, b: polars.DataFrame) -> Dict[str, polars.DataType]:
79
- cols: List[str] = list(dict.fromkeys(a.columns + b.columns))
78
+ def _union_schema_many(frames: List[polars.DataFrame]) -> Dict[str, polars.DataType]:
79
+ """Build a unified column-name dtype mapping across N dataframes.
80
+
81
+ The output dict preserves first-appearance order: a column that first
82
+ appears in frame *i* takes position determined by frame *i*'s own order
83
+ relative to columns that appeared earlier. Dtypes are widened via
84
+ ``_resolve_unified_dtype`` over the set of dtypes the column carries
85
+ across all frames that contain it.
86
+ """
87
+ seen: Set[str] = set()
88
+ cols: List[str] = []
89
+ for f in frames:
90
+ for c in f.columns:
91
+ if c not in seen:
92
+ seen.add(c)
93
+ cols.append(c)
80
94
  target: Dict[str, polars.DataType] = {}
81
95
  for c in cols:
82
96
  types: Set[polars.DataType] = set()
83
- if c in a.columns:
84
- types.add(a[c].dtype)
85
- if c in b.columns:
86
- types.add(b[c].dtype)
97
+ for f in frames:
98
+ if c in f.columns:
99
+ types.add(f[c].dtype)
87
100
  target[c] = _resolve_unified_dtype(types)
88
101
  return target
89
102
 
90
103
 
104
+ def _union_schema(a: polars.DataFrame, b: polars.DataFrame) -> Dict[str, polars.DataType]:
105
+ return _union_schema_many([a, b])
106
+
107
+
91
108
  def _align_to_schema(df: polars.DataFrame, target_schema: Dict[str, polars.DataType]) -> polars.DataFrame:
92
- exprs = []
109
+ """Project *df* into *target_schema*: same column names, same order, same dtypes.
110
+
111
+ For every column in *target_schema*:
112
+ - present in *df* with the target dtype → keep the existing series
113
+ - present in *df* with a different dtype → cast (strict=False, so unconvertible values become null)
114
+ - absent in *df* → fill with a typed null literal
115
+
116
+ The resulting frame's column order is **exactly** ``list(target_schema.keys())``.
117
+ This is the contract callers like :func:`concat_with_union` rely on:
118
+ ``polars.concat(..., how="vertical_relaxed")`` aligns frames *positionally*,
119
+ so it requires identical names at identical positions.
120
+
121
+ Implementation note: ``df.select(exprs)`` is used (not ``with_columns``).
122
+ ``with_columns`` preserves the input frame's column order and appends new
123
+ columns at the end, which silently breaks the positional-concat contract
124
+ when *df*'s order disagrees with *target_schema*'s order.
125
+ """
126
+ if not target_schema:
127
+ return df
128
+ # Zero-row defence: ``df.select([pl.lit(None), ...])`` on an empty frame
129
+ # broadcasts the literal to a single null row, which would silently turn
130
+ # a 0-row input into a 1-row output. Materialise an explicit empty frame
131
+ # with the target schema instead.
132
+ if df.height == 0:
133
+ return polars.DataFrame(schema=target_schema)
134
+ exprs: List[polars.Expr] = []
93
135
  for col, dtype in target_schema.items():
94
136
  if col in df.columns:
95
- if df[col].dtype != dtype:
137
+ if df.schema[col] != dtype:
96
138
  exprs.append(polars.col(col).cast(dtype, strict=False))
139
+ else:
140
+ exprs.append(polars.col(col))
97
141
  else:
98
142
  exprs.append(polars.lit(None, dtype=dtype).alias(col))
99
- return df.with_columns(exprs) if exprs else df
143
+ return df.select(exprs)
100
144
 
101
145
 
102
146
  def concat_with_union(a: polars.DataFrame, b: polars.DataFrame) -> polars.DataFrame:
147
+ """Vertically concatenate two frames with a unified schema.
148
+
149
+ Computes the union of *a*'s and *b*'s schemas, aligns both frames to it
150
+ (filling missing columns with nulls and widening conflicting dtypes), and
151
+ then concatenates positionally. After the union both frames have
152
+ identical columns in identical positions, so the concat cannot fail with
153
+ ``schema names differ``.
154
+ """
103
155
  if a.height == 0:
104
156
  return b
105
157
  if b.height == 0:
106
158
  return a
107
- target = _union_schema(a, b)
108
- return polars.concat([_align_to_schema(a, target), _align_to_schema(b, target)], how="vertical_relaxed")
159
+ target = _union_schema_many([a, b])
160
+ return polars.concat(
161
+ [_align_to_schema(a, target), _align_to_schema(b, target)],
162
+ how="vertical_relaxed",
163
+ )
164
+
165
+
166
+ def concat_many_with_union(frames: List[polars.DataFrame]) -> polars.DataFrame:
167
+ """Vertically concatenate N frames with a single unified schema.
168
+
169
+ Equivalent to repeated :func:`concat_with_union` but computes the union
170
+ schema once across all inputs (rather than re-deriving it pairwise), and
171
+ issues a single ``polars.concat``. Use this when merging an arbitrary
172
+ set of parquet files with potentially different / dynamic column sets
173
+ (e.g. GA4-style ``param_*`` dynamic columns where each batch contains a
174
+ different subset of keys).
175
+
176
+ Semantics:
177
+ - Empty frames are skipped.
178
+ - If all frames are empty, an empty frame with the union schema is returned.
179
+ - If no frames are given, an empty zero-column frame is returned.
180
+
181
+ Note on memory: this materialises every input frame in memory at once.
182
+ For memory-bounded streaming compaction, callers should still iterate
183
+ with chunked flushes via :func:`concat_with_union` — this helper is for
184
+ callers that already have all frames in memory.
185
+ """
186
+ if not frames:
187
+ return polars.DataFrame()
188
+ non_empty = [f for f in frames if f.height > 0]
189
+ if not non_empty:
190
+ # All inputs are empty — return an empty frame carrying the union schema
191
+ target = _union_schema_many(frames)
192
+ return polars.DataFrame(schema=target)
193
+ if len(non_empty) == 1:
194
+ # Still project to its own schema explicitly so the output dtype map is
195
+ # the same shape as the multi-frame path (callers can rely on it).
196
+ target = _union_schema_many(non_empty)
197
+ return _align_to_schema(non_empty[0], target)
198
+ target = _union_schema_many(non_empty)
199
+ aligned = [_align_to_schema(f, target) for f in non_empty]
200
+ return polars.concat(aligned, how="vertical_relaxed")
109
201
 
110
202
 
111
203
  # =========================
@@ -0,0 +1,475 @@
1
+ """
2
+ Regression and edge-case tests for the column-order bug in
3
+ ``supertable.processing._align_to_schema``.
4
+
5
+ # Background
6
+
7
+ The original implementation projected via ``df.with_columns(exprs)``, which
8
+ preserves *df*'s original column order and appends new columns at the end.
9
+ That violated the precondition of the subsequent
10
+ ``polars.concat(..., how="vertical_relaxed")`` call (which matches columns
11
+ *positionally*), causing production failures of the form::
12
+
13
+ polars.exceptions.ComputeError:
14
+ schema names differ: got param_chosen_payment_method, expected param_content
15
+
16
+ The failure is **structural**: any time two parquet files in the same
17
+ compaction have different column subsets (or different orderings), the
18
+ positional concat would explode.
19
+
20
+ The fix re-implements ``_align_to_schema`` with ``df.select(exprs)`` so the
21
+ output column order is **always** ``list(target_schema.keys())``, regardless
22
+ of what *df*'s own column order happened to be. A new
23
+ ``concat_many_with_union`` helper is also tested here — it computes the union
24
+ schema once across N frames and aligns/concats in a single pass (useful when
25
+ merging many parquet files with dynamic column sets, e.g. GA4 ``param_*``).
26
+
27
+ # What these tests pin
28
+
29
+ 1. _align_to_schema returns columns in **exact** target order (regression).
30
+ 2. concat_with_union succeeds when both frames have different column subsets,
31
+ including the exact failure pattern observed in production.
32
+ 3. concat_many_with_union behaves like an N-way fold of concat_with_union
33
+ but with a single schema-union pass.
34
+ 4. Type widening, null filling, and empty-frame handling work as documented.
35
+ 5. The loop pattern used inside ``process_files_with_overlap`` /
36
+ ``process_files_without_overlap`` (repeated pairwise concat) no longer
37
+ fails on schema-divergent inputs.
38
+ """
39
+
40
+ from __future__ import annotations
41
+
42
+ import os
43
+ import random
44
+ from typing import List
45
+
46
+ import polars as pl
47
+ import pytest
48
+
49
+ # Make sure environment is set up before importing the package, mirroring
50
+ # the convention used in test_redis_key_prefix.py.
51
+ os.environ.setdefault("SUPERTABLE_ORGANIZATION", "test_org")
52
+ os.environ.setdefault("SUPERTABLE_SUPERUSER_TOKEN", "test_token")
53
+
54
+ from supertable.processing import ( # noqa: E402
55
+ _align_to_schema,
56
+ _union_schema,
57
+ _union_schema_many,
58
+ concat_with_union,
59
+ concat_many_with_union,
60
+ )
61
+
62
+
63
+ # ---------------------------------------------------------------------------
64
+ # Helpers
65
+ # ---------------------------------------------------------------------------
66
+
67
+ def _df(**cols) -> pl.DataFrame:
68
+ return pl.DataFrame(cols)
69
+
70
+
71
+ # ===========================================================================
72
+ # 1. _align_to_schema column-order invariant
73
+ # ===========================================================================
74
+
75
+ class TestAlignToSchemaColumnOrder:
76
+ """The post-condition: result.columns == list(target_schema.keys())."""
77
+
78
+ def test_output_order_matches_target_when_columns_match(self):
79
+ df = _df(b=[1], a=[2]) # df order: [b, a]
80
+ target = {"a": pl.Int64, "b": pl.Int64} # target order: [a, b]
81
+ result = _align_to_schema(df, target)
82
+ assert result.columns == ["a", "b"]
83
+
84
+ def test_output_order_matches_target_when_missing_columns(self):
85
+ df = _df(b=[1])
86
+ target = {"a": pl.Utf8, "b": pl.Int64}
87
+ result = _align_to_schema(df, target)
88
+ assert result.columns == ["a", "b"]
89
+ assert result["a"].to_list() == [None]
90
+
91
+ def test_output_order_matches_target_when_subset(self):
92
+ df = _df(a=[1], b=[2], c=[3])
93
+ target = {"b": pl.Int64, "a": pl.Int64} # target picks only b, a
94
+ result = _align_to_schema(df, target)
95
+ assert result.columns == ["b", "a"]
96
+ # Column 'c' is dropped because target doesn't include it
97
+ assert "c" not in result.columns
98
+
99
+ def test_data_preserved_under_reordering(self):
100
+ df = _df(b=[10, 20], a=[1, 2])
101
+ target = {"a": pl.Int64, "b": pl.Int64}
102
+ result = _align_to_schema(df, target)
103
+ assert result["a"].to_list() == [1, 2]
104
+ assert result["b"].to_list() == [10, 20]
105
+
106
+ def test_empty_target_returns_df_unchanged(self):
107
+ df = _df(a=[1])
108
+ result = _align_to_schema(df, {})
109
+ assert result.equals(df)
110
+
111
+ def test_dtype_cast_applied_and_order_kept(self):
112
+ df = pl.DataFrame({"x": pl.Series([1, 2], dtype=pl.Int32), "y": [3, 4]})
113
+ target = {"y": pl.Int64, "x": pl.Float64}
114
+ result = _align_to_schema(df, target)
115
+ assert result.columns == ["y", "x"]
116
+ assert result["x"].dtype == pl.Float64
117
+ assert result["y"].dtype == pl.Int64
118
+
119
+ def test_all_missing_columns(self):
120
+ df = pl.DataFrame(schema={"existing": pl.Int64})
121
+ target = {"new1": pl.Utf8, "new2": pl.Float64}
122
+ result = _align_to_schema(df, target)
123
+ assert result.columns == ["new1", "new2"]
124
+ # Original frame had zero rows → result also has zero rows
125
+ assert result.height == 0
126
+
127
+
128
+ # ===========================================================================
129
+ # 2. concat_with_union — production-bug reproduction
130
+ # ===========================================================================
131
+
132
+ class TestConcatWithUnionProductionBug:
133
+ """The exact failure pattern seen in GA4 compaction logs."""
134
+
135
+ def test_disjoint_columns_with_alphabetical_a(self):
136
+ # Frame 'a' has both columns in alphabetical order (the engine's
137
+ # deterministic dump order). Frame 'b' is missing the first one.
138
+ a = _df(param_chosen_payment_method=["paypal"], param_content=["home"])
139
+ b = _df(param_content=["detail", "list"])
140
+ result = concat_with_union(a, b)
141
+ assert result.height == 3
142
+ assert set(result.columns) == {"param_chosen_payment_method", "param_content"}
143
+
144
+ def test_disjoint_columns_with_alphabetical_b(self):
145
+ # Symmetric: now 'b' is the one carrying the wider schema.
146
+ a = _df(param_content=["home", "detail"])
147
+ b = _df(param_chosen_payment_method=["paypal"], param_content=["list"])
148
+ result = concat_with_union(a, b)
149
+ assert result.height == 3
150
+ assert set(result.columns) == {"param_chosen_payment_method", "param_content"}
151
+
152
+ def test_each_frame_has_exclusive_column(self):
153
+ # Each frame contributes a column the other lacks.
154
+ a = _df(only_in_a=["x"], shared=["s1"])
155
+ b = _df(shared=["s2"], only_in_b=["y"])
156
+ result = concat_with_union(a, b)
157
+ assert result.height == 2
158
+ assert set(result.columns) == {"only_in_a", "shared", "only_in_b"}
159
+
160
+ def test_existing_frame_has_reverse_order(self):
161
+ # 'a' has cols [x, y]; 'b' has the same cols but in order [y, x].
162
+ # Without the fix, both would survive _align_to_schema unchanged,
163
+ # and the positional concat would fail.
164
+ a = _df(x=[1], y=[2])
165
+ b = _df(y=[3], x=[4])
166
+ result = concat_with_union(a, b)
167
+ assert result.height == 2
168
+ assert result["x"].to_list() == [1, 4]
169
+ assert result["y"].to_list() == [2, 3]
170
+
171
+ def test_concat_with_union_is_order_independent(self):
172
+ # The result of (a + b) and (b + a) should contain the same rows
173
+ # (in different vertical orders, but with the same column set).
174
+ a = _df(only_in_a=["x"], shared=["s1"])
175
+ b = _df(shared=["s2"], only_in_b=["y"])
176
+ r1 = concat_with_union(a, b)
177
+ r2 = concat_with_union(b, a)
178
+ assert set(r1.columns) == set(r2.columns)
179
+ # Stack rows and compare as multisets
180
+ r1_rows = set(map(tuple, r1.select(sorted(r1.columns)).rows()))
181
+ r2_rows = set(map(tuple, r2.select(sorted(r2.columns)).rows()))
182
+ assert r1_rows == r2_rows
183
+
184
+
185
+ # ===========================================================================
186
+ # 3. concat_with_union — empty / single-frame edge cases
187
+ # ===========================================================================
188
+
189
+ class TestConcatWithUnionEdgeCases:
190
+
191
+ def test_empty_a_returns_b(self):
192
+ a = pl.DataFrame(schema={"id": pl.Int64})
193
+ b = _df(id=[1, 2])
194
+ result = concat_with_union(a, b)
195
+ assert result.height == 2
196
+
197
+ def test_empty_b_returns_a(self):
198
+ a = _df(id=[1, 2])
199
+ b = pl.DataFrame(schema={"id": pl.Int64})
200
+ result = concat_with_union(a, b)
201
+ assert result.height == 2
202
+
203
+ def test_both_empty(self):
204
+ a = pl.DataFrame(schema={"id": pl.Int64})
205
+ b = pl.DataFrame(schema={"id": pl.Int64})
206
+ result = concat_with_union(a, b)
207
+ assert result.height == 0
208
+
209
+ def test_type_widening_int_float(self):
210
+ a = pl.DataFrame({"x": pl.Series([1], dtype=pl.Int32)})
211
+ b = pl.DataFrame({"x": pl.Series([2.5], dtype=pl.Float64)})
212
+ result = concat_with_union(a, b)
213
+ assert result.height == 2
214
+ assert result["x"].dtype == pl.Float64
215
+
216
+ def test_type_widening_int_string(self):
217
+ a = pl.DataFrame({"x": pl.Series([1], dtype=pl.Int64)})
218
+ b = pl.DataFrame({"x": pl.Series(["hello"], dtype=pl.Utf8)})
219
+ result = concat_with_union(a, b)
220
+ assert result.height == 2
221
+ assert result["x"].dtype == pl.Utf8
222
+ # The Int64 1 is cast strict=False; for int → str the cast succeeds.
223
+ assert result["x"].to_list() == ["1", "hello"]
224
+
225
+
226
+ # ===========================================================================
227
+ # 4. _union_schema_many — preserves first-appearance order
228
+ # ===========================================================================
229
+
230
+ class TestUnionSchemaMany:
231
+
232
+ def test_single_frame(self):
233
+ f = _df(a=[1], b=[2])
234
+ target = _union_schema_many([f])
235
+ assert list(target.keys()) == ["a", "b"]
236
+
237
+ def test_two_frames_first_appearance_order(self):
238
+ a = _df(x=[1], y=[2])
239
+ b = _df(y=[3], z=[4])
240
+ target = _union_schema_many([a, b])
241
+ # x is from a, y from both (a wins), z from b → [x, y, z]
242
+ assert list(target.keys()) == ["x", "y", "z"]
243
+
244
+ def test_three_frames_first_appearance_order(self):
245
+ f1 = _df(c=[1])
246
+ f2 = _df(a=[2], c=[3])
247
+ f3 = _df(b=[4])
248
+ target = _union_schema_many([f1, f2, f3])
249
+ # c first appears in f1 → first; a from f2 → second; b from f3 → third
250
+ assert list(target.keys()) == ["c", "a", "b"]
251
+
252
+ def test_dtype_widening_across_three_frames(self):
253
+ f1 = pl.DataFrame({"x": pl.Series([1], dtype=pl.Int8)})
254
+ f2 = pl.DataFrame({"x": pl.Series([2], dtype=pl.Int32)})
255
+ f3 = pl.DataFrame({"x": pl.Series([3], dtype=pl.Int64)})
256
+ target = _union_schema_many([f1, f2, f3])
257
+ # All ints widen to Int64
258
+ assert target["x"] == pl.Int64
259
+
260
+ def test_dtype_widening_to_utf8_across_three_frames(self):
261
+ f1 = pl.DataFrame({"x": pl.Series([1], dtype=pl.Int64)})
262
+ f2 = pl.DataFrame({"x": pl.Series([1.0], dtype=pl.Float64)})
263
+ f3 = pl.DataFrame({"x": pl.Series(["s"], dtype=pl.Utf8)})
264
+ target = _union_schema_many([f1, f2, f3])
265
+ assert target["x"] == pl.Utf8
266
+
267
+ def test_empty_list_returns_empty_schema(self):
268
+ target = _union_schema_many([])
269
+ assert target == {}
270
+
271
+ def test_pairwise_helper_matches_many(self):
272
+ a = _df(x=[1], y=[2])
273
+ b = _df(y=[3], z=[4])
274
+ assert _union_schema(a, b) == _union_schema_many([a, b])
275
+
276
+
277
+ # ===========================================================================
278
+ # 5. concat_many_with_union — N-way merge
279
+ # ===========================================================================
280
+
281
+ class TestConcatManyWithUnion:
282
+
283
+ def test_three_frames_disjoint_columns(self):
284
+ f1 = _df(a=[1])
285
+ f2 = _df(b=[2])
286
+ f3 = _df(c=[3])
287
+ result = concat_many_with_union([f1, f2, f3])
288
+ assert result.height == 3
289
+ assert set(result.columns) == {"a", "b", "c"}
290
+
291
+ def test_three_frames_dynamic_columns_ga4_style(self):
292
+ # Each "parquet" has a different subset of param_* columns,
293
+ # in alphabetical-by-engine order. This mirrors the production
294
+ # workload that triggered the bug.
295
+ f_purchase = _df(
296
+ event_id=[1, 2],
297
+ param_chosen_payment_method=["paypal", "card"],
298
+ param_content=["home", "detail"],
299
+ param_value=[10.0, 20.0],
300
+ )
301
+ f_view = _df(
302
+ event_id=[3, 4, 5],
303
+ param_content=["list", "detail", "home"],
304
+ param_page_location=["/a", "/b", "/c"],
305
+ )
306
+ f_click = _df(
307
+ event_id=[6],
308
+ param_button_id=["x"],
309
+ param_content=["banner"],
310
+ )
311
+ result = concat_many_with_union([f_purchase, f_view, f_click])
312
+ assert result.height == 6
313
+ # All param_* columns from all frames present
314
+ for col in (
315
+ "event_id",
316
+ "param_chosen_payment_method",
317
+ "param_content",
318
+ "param_value",
319
+ "param_page_location",
320
+ "param_button_id",
321
+ ):
322
+ assert col in result.columns
323
+ # Sanity: param_content fully populated (all three frames had it)
324
+ assert result["param_content"].null_count() == 0
325
+ # Sanity: param_button_id null everywhere except the click frame
326
+ assert result["param_button_id"].null_count() == 5
327
+
328
+ def test_all_empty_frames_returns_empty_with_union_schema(self):
329
+ f1 = pl.DataFrame(schema={"a": pl.Int64})
330
+ f2 = pl.DataFrame(schema={"b": pl.Utf8})
331
+ result = concat_many_with_union([f1, f2])
332
+ assert result.height == 0
333
+ assert set(result.columns) == {"a", "b"}
334
+
335
+ def test_no_frames_returns_empty_zero_column(self):
336
+ result = concat_many_with_union([])
337
+ assert result.height == 0
338
+ assert result.columns == []
339
+
340
+ def test_single_frame_returns_projected_copy(self):
341
+ f = _df(a=[1, 2], b=[3, 4])
342
+ result = concat_many_with_union([f])
343
+ assert result.height == 2
344
+ assert result.columns == ["a", "b"]
345
+
346
+ def test_skips_empty_in_middle(self):
347
+ f1 = _df(a=[1])
348
+ f2 = pl.DataFrame(schema={"a": pl.Int64, "b": pl.Utf8}) # empty
349
+ f3 = _df(a=[2], b=["x"])
350
+ result = concat_many_with_union([f1, f2, f3])
351
+ # Empty frame is skipped → no NULL row introduced
352
+ assert result.height == 2
353
+ assert set(result.columns) == {"a", "b"}
354
+
355
+ def test_equivalent_to_repeated_concat_with_union(self):
356
+ # The same frames merged via N-way must yield the same rows as
357
+ # progressive pairwise application (the pattern used inside
358
+ # process_files_with_overlap / process_files_without_overlap).
359
+ frames = [
360
+ _df(a=[1], b=["x"]),
361
+ _df(b=["y"], c=[2.0]),
362
+ _df(a=[3], c=[4.0], d=[True]),
363
+ ]
364
+ n_way = concat_many_with_union(frames)
365
+ pairwise = frames[0]
366
+ for f in frames[1:]:
367
+ pairwise = concat_with_union(pairwise, f)
368
+ # Same column set
369
+ assert set(n_way.columns) == set(pairwise.columns)
370
+ # Same rows when compared column-by-column
371
+ n_rows = set(map(tuple, n_way.select(sorted(n_way.columns)).rows()))
372
+ p_rows = set(map(tuple, pairwise.select(sorted(pairwise.columns)).rows()))
373
+ assert n_rows == p_rows
374
+
375
+
376
+ # ===========================================================================
377
+ # 6. Cascading pairwise merge — simulates the in-codebase loop
378
+ # ===========================================================================
379
+
380
+ class TestCascadingPairwiseMerge:
381
+ """The streaming-flush callers (process_files_with_overlap etc.) use a
382
+ loop ``merged = concat_with_union(merged, file_i)``. This pattern must
383
+ work for arbitrary file orderings and column-subset combinations."""
384
+
385
+ def test_five_files_random_column_subsets(self):
386
+ rng = random.Random(42)
387
+ all_columns = ["event_id", "param_a", "param_b", "param_c", "param_d", "param_e"]
388
+ # Build 5 frames each with a random subset of param_* columns + event_id
389
+ frames: List[pl.DataFrame] = []
390
+ for i in range(5):
391
+ subset = ["event_id"] + rng.sample(
392
+ ["param_a", "param_b", "param_c", "param_d", "param_e"],
393
+ k=rng.randint(1, 5),
394
+ )
395
+ # Permute column order so different frames hit different branches
396
+ # of _align_to_schema
397
+ rng.shuffle(subset)
398
+ data = {c: [f"v{i}_{c}"] if c != "event_id" else [i] for c in subset}
399
+ frames.append(pl.DataFrame(data))
400
+
401
+ # Fold via the in-code loop pattern
402
+ merged = frames[0]
403
+ for f in frames[1:]:
404
+ merged = concat_with_union(merged, f)
405
+
406
+ assert merged.height == 5
407
+ assert set(merged.columns) == set(all_columns)
408
+
409
+ def test_loop_order_does_not_change_result(self):
410
+ # Three frames; merge in two different orders. Should yield equal
411
+ # row multisets.
412
+ from collections import Counter
413
+
414
+ a = _df(x=[1], y=["a"])
415
+ b = _df(y=["b"], z=[2.0])
416
+ c = _df(x=[3], z=[4.0])
417
+
418
+ forward = concat_with_union(concat_with_union(a, b), c)
419
+ reverse = concat_with_union(concat_with_union(c, b), a)
420
+
421
+ cols = sorted(forward.columns)
422
+ # Use Counter (multiset) rather than sorted(): rows contain None
423
+ # values which are unorderable, but they are hashable so a multiset
424
+ # comparison works.
425
+ f_rows = Counter(map(tuple, forward.select(cols).rows()))
426
+ r_rows = Counter(map(tuple, reverse.select(cols).rows()))
427
+ assert f_rows == r_rows
428
+
429
+
430
+ # ===========================================================================
431
+ # 7. Production-error regression — pin the exact failure pattern
432
+ # ===========================================================================
433
+
434
+ class TestProductionErrorRegression:
435
+ """Reproduce the exact symptom from the GA4 compaction logs.
436
+
437
+ Before the fix this test would raise::
438
+
439
+ polars.exceptions.ComputeError:
440
+ schema names differ: got param_chosen_payment_method, expected param_content
441
+
442
+ After the fix it must complete cleanly.
443
+ """
444
+
445
+ def test_no_schema_names_differ_error(self):
446
+ # Composite schema: many shared columns + one column that exists in
447
+ # 'a' only (mimicking a purchase-event slice that has payment_method
448
+ # while a view-only slice does not).
449
+ a = _df(
450
+ event_id=[1],
451
+ event_name=["purchase"],
452
+ param_chosen_payment_method=["paypal"],
453
+ param_content=["home"],
454
+ param_currency=["EUR"],
455
+ param_value=[42.0],
456
+ )
457
+ b = _df(
458
+ event_id=[2, 3],
459
+ event_name=["page_view", "page_view"],
460
+ param_content=["list", "detail"],
461
+ param_currency=[None, None],
462
+ param_value=[None, None],
463
+ )
464
+
465
+ # Must not raise
466
+ result = concat_with_union(a, b)
467
+
468
+ assert result.height == 3
469
+ assert "param_chosen_payment_method" in result.columns
470
+ # Rows from 'b' must have null for the column that didn't exist in 'b'
471
+ # (we don't depend on row ordering after concat — filter instead)
472
+ b_rows_with_null_pcm = result.filter(
473
+ pl.col("event_name") == "page_view"
474
+ )["param_chosen_payment_method"].null_count()
475
+ assert b_rows_with_null_pcm == 2
@@ -211,8 +211,8 @@ class TestConcatWithUnion:
211
211
 
212
212
  def test_union_different_schemas(self):
213
213
  from supertable.processing import concat_with_union
214
- # Both share 'x' as first column; each has a unique extra column.
215
- # _align_to_schema adds missing cols at the end, so order stays compatible.
214
+ # Each frame contributes a different column subset; _align_to_schema
215
+ # must project both into the union schema before the positional concat.
216
216
  a = _polars_df({"x": [1], "y": ["a"]})
217
217
  b = _polars_df({"x": [2], "y": [None], "z": [3.0]})
218
218
  result = concat_with_union(a, b)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: supertable
3
- Version: 2.0.3
3
+ Version: 2.0.4
4
4
  Summary: SuperTable — versioned data lake library for SQL analytics on Parquet + Redis.
5
5
  Author: Levente Kupas
6
6
  Author-email: Levente Kupas <lkupas@kladnasoft.com>