sqlmesh 0.217.1.dev1__py3-none-any.whl → 0.227.2.dev20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. sqlmesh/__init__.py +12 -2
  2. sqlmesh/_version.py +2 -2
  3. sqlmesh/cli/project_init.py +10 -2
  4. sqlmesh/core/_typing.py +1 -0
  5. sqlmesh/core/audit/definition.py +8 -2
  6. sqlmesh/core/config/__init__.py +1 -1
  7. sqlmesh/core/config/connection.py +20 -5
  8. sqlmesh/core/config/dbt.py +13 -0
  9. sqlmesh/core/config/janitor.py +12 -0
  10. sqlmesh/core/config/loader.py +7 -0
  11. sqlmesh/core/config/model.py +2 -0
  12. sqlmesh/core/config/root.py +3 -0
  13. sqlmesh/core/console.py +80 -2
  14. sqlmesh/core/constants.py +1 -1
  15. sqlmesh/core/context.py +112 -35
  16. sqlmesh/core/dialect.py +3 -0
  17. sqlmesh/core/engine_adapter/_typing.py +2 -0
  18. sqlmesh/core/engine_adapter/base.py +330 -23
  19. sqlmesh/core/engine_adapter/base_postgres.py +17 -1
  20. sqlmesh/core/engine_adapter/bigquery.py +146 -7
  21. sqlmesh/core/engine_adapter/clickhouse.py +17 -13
  22. sqlmesh/core/engine_adapter/databricks.py +50 -2
  23. sqlmesh/core/engine_adapter/fabric.py +110 -29
  24. sqlmesh/core/engine_adapter/mixins.py +142 -48
  25. sqlmesh/core/engine_adapter/mssql.py +15 -4
  26. sqlmesh/core/engine_adapter/mysql.py +2 -2
  27. sqlmesh/core/engine_adapter/postgres.py +9 -3
  28. sqlmesh/core/engine_adapter/redshift.py +4 -0
  29. sqlmesh/core/engine_adapter/risingwave.py +1 -0
  30. sqlmesh/core/engine_adapter/shared.py +6 -0
  31. sqlmesh/core/engine_adapter/snowflake.py +82 -11
  32. sqlmesh/core/engine_adapter/spark.py +14 -10
  33. sqlmesh/core/engine_adapter/trino.py +5 -2
  34. sqlmesh/core/janitor.py +181 -0
  35. sqlmesh/core/lineage.py +1 -0
  36. sqlmesh/core/linter/rules/builtin.py +15 -0
  37. sqlmesh/core/loader.py +17 -30
  38. sqlmesh/core/macros.py +35 -13
  39. sqlmesh/core/model/common.py +2 -0
  40. sqlmesh/core/model/definition.py +72 -4
  41. sqlmesh/core/model/kind.py +66 -2
  42. sqlmesh/core/model/meta.py +107 -2
  43. sqlmesh/core/node.py +101 -2
  44. sqlmesh/core/plan/builder.py +15 -10
  45. sqlmesh/core/plan/common.py +196 -2
  46. sqlmesh/core/plan/definition.py +21 -6
  47. sqlmesh/core/plan/evaluator.py +72 -113
  48. sqlmesh/core/plan/explainer.py +90 -8
  49. sqlmesh/core/plan/stages.py +42 -21
  50. sqlmesh/core/renderer.py +26 -18
  51. sqlmesh/core/scheduler.py +60 -19
  52. sqlmesh/core/selector.py +137 -9
  53. sqlmesh/core/signal.py +64 -1
  54. sqlmesh/core/snapshot/__init__.py +1 -0
  55. sqlmesh/core/snapshot/definition.py +109 -25
  56. sqlmesh/core/snapshot/evaluator.py +610 -50
  57. sqlmesh/core/state_sync/__init__.py +0 -1
  58. sqlmesh/core/state_sync/base.py +31 -27
  59. sqlmesh/core/state_sync/cache.py +12 -4
  60. sqlmesh/core/state_sync/common.py +216 -111
  61. sqlmesh/core/state_sync/db/facade.py +30 -15
  62. sqlmesh/core/state_sync/db/interval.py +27 -7
  63. sqlmesh/core/state_sync/db/migrator.py +14 -8
  64. sqlmesh/core/state_sync/db/snapshot.py +119 -87
  65. sqlmesh/core/table_diff.py +2 -2
  66. sqlmesh/core/test/definition.py +14 -9
  67. sqlmesh/core/test/discovery.py +4 -0
  68. sqlmesh/dbt/adapter.py +20 -11
  69. sqlmesh/dbt/basemodel.py +52 -41
  70. sqlmesh/dbt/builtin.py +27 -11
  71. sqlmesh/dbt/column.py +17 -5
  72. sqlmesh/dbt/common.py +4 -2
  73. sqlmesh/dbt/context.py +14 -1
  74. sqlmesh/dbt/loader.py +60 -8
  75. sqlmesh/dbt/manifest.py +136 -8
  76. sqlmesh/dbt/model.py +105 -25
  77. sqlmesh/dbt/package.py +16 -1
  78. sqlmesh/dbt/profile.py +3 -3
  79. sqlmesh/dbt/project.py +12 -7
  80. sqlmesh/dbt/seed.py +1 -1
  81. sqlmesh/dbt/source.py +6 -1
  82. sqlmesh/dbt/target.py +25 -6
  83. sqlmesh/dbt/test.py +31 -1
  84. sqlmesh/integrations/github/cicd/controller.py +6 -2
  85. sqlmesh/lsp/context.py +4 -2
  86. sqlmesh/magics.py +1 -1
  87. sqlmesh/migrations/v0000_baseline.py +3 -6
  88. sqlmesh/migrations/v0061_mysql_fix_blob_text_type.py +2 -5
  89. sqlmesh/migrations/v0062_add_model_gateway.py +2 -2
  90. sqlmesh/migrations/v0063_change_signals.py +2 -4
  91. sqlmesh/migrations/v0064_join_when_matched_strings.py +2 -4
  92. sqlmesh/migrations/v0065_add_model_optimize.py +2 -2
  93. sqlmesh/migrations/v0066_add_auto_restatements.py +2 -6
  94. sqlmesh/migrations/v0067_add_tsql_date_full_precision.py +2 -2
  95. sqlmesh/migrations/v0068_include_unrendered_query_in_metadata_hash.py +2 -2
  96. sqlmesh/migrations/v0069_update_dev_table_suffix.py +2 -4
  97. sqlmesh/migrations/v0070_include_grains_in_metadata_hash.py +2 -2
  98. sqlmesh/migrations/v0071_add_dev_version_to_intervals.py +2 -6
  99. sqlmesh/migrations/v0072_add_environment_statements.py +2 -4
  100. sqlmesh/migrations/v0073_remove_symbolic_disable_restatement.py +2 -4
  101. sqlmesh/migrations/v0074_add_partition_by_time_column_property.py +2 -2
  102. sqlmesh/migrations/v0075_remove_validate_query.py +2 -4
  103. sqlmesh/migrations/v0076_add_cron_tz.py +2 -2
  104. sqlmesh/migrations/v0077_fix_column_type_hash_calculation.py +2 -2
  105. sqlmesh/migrations/v0078_warn_if_non_migratable_python_env.py +2 -4
  106. sqlmesh/migrations/v0079_add_gateway_managed_property.py +7 -9
  107. sqlmesh/migrations/v0080_add_batch_size_to_scd_type_2_models.py +2 -2
  108. sqlmesh/migrations/v0081_update_partitioned_by.py +2 -4
  109. sqlmesh/migrations/v0082_warn_if_incorrectly_duplicated_statements.py +2 -4
  110. sqlmesh/migrations/v0083_use_sql_for_scd_time_data_type_data_hash.py +2 -2
  111. sqlmesh/migrations/v0084_normalize_quote_when_matched_and_merge_filter.py +2 -2
  112. sqlmesh/migrations/v0085_deterministic_repr.py +2 -4
  113. sqlmesh/migrations/v0086_check_deterministic_bug.py +2 -4
  114. sqlmesh/migrations/v0087_normalize_blueprint_variables.py +2 -4
  115. sqlmesh/migrations/v0088_warn_about_variable_python_env_diffs.py +2 -4
  116. sqlmesh/migrations/v0089_add_virtual_environment_mode.py +2 -2
  117. sqlmesh/migrations/v0090_add_forward_only_column.py +2 -6
  118. sqlmesh/migrations/v0091_on_additive_change.py +2 -2
  119. sqlmesh/migrations/v0092_warn_about_dbt_data_type_diff.py +2 -4
  120. sqlmesh/migrations/v0093_use_raw_sql_in_fingerprint.py +2 -2
  121. sqlmesh/migrations/v0094_add_dev_version_and_fingerprint_columns.py +2 -6
  122. sqlmesh/migrations/v0095_warn_about_dbt_raw_sql_diff.py +2 -4
  123. sqlmesh/migrations/v0096_remove_plan_dags_table.py +2 -4
  124. sqlmesh/migrations/v0097_add_dbt_name_in_node.py +2 -2
  125. sqlmesh/migrations/v0098_add_dbt_node_info_in_node.py +103 -0
  126. sqlmesh/migrations/v0099_add_last_altered_to_intervals.py +25 -0
  127. sqlmesh/migrations/v0100_add_grants_and_grants_target_layer.py +9 -0
  128. sqlmesh/utils/__init__.py +8 -1
  129. sqlmesh/utils/cache.py +5 -1
  130. sqlmesh/utils/date.py +1 -1
  131. sqlmesh/utils/errors.py +4 -0
  132. sqlmesh/utils/git.py +3 -1
  133. sqlmesh/utils/jinja.py +25 -2
  134. sqlmesh/utils/pydantic.py +6 -6
  135. sqlmesh/utils/windows.py +13 -3
  136. {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/METADATA +5 -5
  137. {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/RECORD +188 -183
  138. sqlmesh_dbt/cli.py +70 -7
  139. sqlmesh_dbt/console.py +14 -6
  140. sqlmesh_dbt/operations.py +103 -24
  141. sqlmesh_dbt/selectors.py +39 -1
  142. web/client/dist/assets/{Audits-Ucsx1GzF.js → Audits-CBiYyyx-.js} +1 -1
  143. web/client/dist/assets/{Banner-BWDzvavM.js → Banner-DSRbUlO5.js} +1 -1
  144. web/client/dist/assets/{ChevronDownIcon-D2VL13Ah.js → ChevronDownIcon-MK_nrjD_.js} +1 -1
  145. web/client/dist/assets/{ChevronRightIcon-DWGYbf1l.js → ChevronRightIcon-CLWtT22Q.js} +1 -1
  146. web/client/dist/assets/{Content-DdHDZM3I.js → Content-BNuGZN5l.js} +1 -1
  147. web/client/dist/assets/{Content-Bikfy8fh.js → Content-CSHJyW0n.js} +1 -1
  148. web/client/dist/assets/{Data-CzAJH7rW.js → Data-C1oRDbLx.js} +1 -1
  149. web/client/dist/assets/{DataCatalog-BJF11g8f.js → DataCatalog-HXyX2-_j.js} +1 -1
  150. web/client/dist/assets/{Editor-s0SBpV2y.js → Editor-BDyfpUuw.js} +1 -1
  151. web/client/dist/assets/{Editor-DgLhgKnm.js → Editor-D0jNItwC.js} +1 -1
  152. web/client/dist/assets/{Errors-D0m0O1d3.js → Errors-BfuFLcPi.js} +1 -1
  153. web/client/dist/assets/{FileExplorer-CEv0vXkt.js → FileExplorer-BR9IE3he.js} +1 -1
  154. web/client/dist/assets/{Footer-BwzXn8Ew.js → Footer-CgBEtiAh.js} +1 -1
  155. web/client/dist/assets/{Header-6heDkEqG.js → Header-DSqR6nSO.js} +1 -1
  156. web/client/dist/assets/{Input-obuJsD6k.js → Input-B-oZ6fGO.js} +1 -1
  157. web/client/dist/assets/Lineage-DYQVwDbD.js +1 -0
  158. web/client/dist/assets/{ListboxShow-HM9_qyrt.js → ListboxShow-BE5-xevs.js} +1 -1
  159. web/client/dist/assets/{ModelLineage-zWdKo0U2.js → ModelLineage-DkIFAYo4.js} +1 -1
  160. web/client/dist/assets/{Models-Bcu66SRz.js → Models-D5dWr8RB.js} +1 -1
  161. web/client/dist/assets/{Page-BWEEQfIt.js → Page-C-XfU5BR.js} +1 -1
  162. web/client/dist/assets/{Plan-C4gXCqlf.js → Plan-ZEuTINBq.js} +1 -1
  163. web/client/dist/assets/{PlusCircleIcon-CVDO651q.js → PlusCircleIcon-DVXAHG8_.js} +1 -1
  164. web/client/dist/assets/{ReportErrors-BT6xFwAr.js → ReportErrors-B7FEPzMB.js} +1 -1
  165. web/client/dist/assets/{Root-ryJoBK4h.js → Root-8aZyhPxF.js} +1 -1
  166. web/client/dist/assets/{SearchList-DB04sPb9.js → SearchList-W_iT2G82.js} +1 -1
  167. web/client/dist/assets/{SelectEnvironment-CUYcXUu6.js → SelectEnvironment-C65jALmO.js} +1 -1
  168. web/client/dist/assets/{SourceList-Doo_9ZGp.js → SourceList-DSLO6nVJ.js} +1 -1
  169. web/client/dist/assets/{SourceListItem-D5Mj7Dly.js → SourceListItem-BHt8d9-I.js} +1 -1
  170. web/client/dist/assets/{SplitPane-qHmkD1qy.js → SplitPane-CViaZmw6.js} +1 -1
  171. web/client/dist/assets/{Tests-DH1Z74ML.js → Tests-DhaVt5t1.js} +1 -1
  172. web/client/dist/assets/{Welcome-DqUJUNMF.js → Welcome-DvpjH-_4.js} +1 -1
  173. web/client/dist/assets/context-BctCsyGb.js +71 -0
  174. web/client/dist/assets/{context-Dr54UHLi.js → context-DFNeGsFF.js} +1 -1
  175. web/client/dist/assets/{editor-DYIP1yQ4.js → editor-CcO28cqd.js} +1 -1
  176. web/client/dist/assets/{file-DarlIDVi.js → file-CvJN3aZO.js} +1 -1
  177. web/client/dist/assets/{floating-ui.react-dom-BH3TFvkM.js → floating-ui.react-dom-CjE-JNW1.js} +1 -1
  178. web/client/dist/assets/{help-Bl8wqaQc.js → help-DuPhjipa.js} +1 -1
  179. web/client/dist/assets/{index-D1sR7wpN.js → index-C-dJH7yZ.js} +1 -1
  180. web/client/dist/assets/{index-O3mjYpnE.js → index-Dj0i1-CA.js} +2 -2
  181. web/client/dist/assets/{plan-CehRrJUG.js → plan-BTRSbjKn.js} +1 -1
  182. web/client/dist/assets/{popover-CqgMRE0G.js → popover-_Sf0yvOI.js} +1 -1
  183. web/client/dist/assets/{project-6gxepOhm.js → project-BvSOI8MY.js} +1 -1
  184. web/client/dist/index.html +1 -1
  185. web/client/dist/assets/Lineage-D0Hgdz2v.js +0 -1
  186. web/client/dist/assets/context-DgX0fp2E.js +0 -68
  187. {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/WHEEL +0 -0
  188. {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/entry_points.txt +0 -0
  189. {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/licenses/LICENSE +0 -0
  190. {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/top_level.txt +0 -0
@@ -20,5 +20,4 @@ from sqlmesh.core.state_sync.base import (
20
20
  Versions as Versions,
21
21
  )
22
22
  from sqlmesh.core.state_sync.cache import CachingStateSync as CachingStateSync
23
- from sqlmesh.core.state_sync.common import cleanup_expired_views as cleanup_expired_views
24
23
  from sqlmesh.core.state_sync.db import EngineAdapterStateSync as EngineAdapterStateSync
@@ -11,7 +11,6 @@ from sqlglot import __version__ as SQLGLOT_VERSION
11
11
  from sqlmesh import migrations
12
12
  from sqlmesh.core.environment import (
13
13
  Environment,
14
- EnvironmentNamingInfo,
15
14
  EnvironmentStatements,
16
15
  EnvironmentSummary,
17
16
  )
@@ -19,9 +18,8 @@ from sqlmesh.core.snapshot import (
19
18
  Snapshot,
20
19
  SnapshotId,
21
20
  SnapshotIdLike,
21
+ SnapshotIdAndVersionLike,
22
22
  SnapshotInfoLike,
23
- SnapshotTableCleanupTask,
24
- SnapshotTableInfo,
25
23
  SnapshotNameVersion,
26
24
  SnapshotIdAndVersion,
27
25
  )
@@ -29,8 +27,13 @@ from sqlmesh.core.snapshot.definition import Interval, SnapshotIntervals
29
27
  from sqlmesh.utils import major_minor
30
28
  from sqlmesh.utils.date import TimeLike
31
29
  from sqlmesh.utils.errors import SQLMeshError
32
- from sqlmesh.utils.pydantic import PydanticModel, ValidationInfo, field_validator
33
- from sqlmesh.core.state_sync.common import StateStream
30
+ from sqlmesh.utils.pydantic import PydanticModel, field_validator
31
+ from sqlmesh.core.state_sync.common import (
32
+ StateStream,
33
+ ExpiredSnapshotBatch,
34
+ PromotionResult,
35
+ ExpiredBatchRange,
36
+ )
34
37
 
35
38
  logger = logging.getLogger(__name__)
36
39
 
@@ -71,20 +74,6 @@ MIGRATIONS = [
71
74
  SCHEMA_VERSION: int = MIN_SCHEMA_VERSION + len(MIGRATIONS) - 1
72
75
 
73
76
 
74
- class PromotionResult(PydanticModel):
75
- added: t.List[SnapshotTableInfo]
76
- removed: t.List[SnapshotTableInfo]
77
- removed_environment_naming_info: t.Optional[EnvironmentNamingInfo]
78
-
79
- @field_validator("removed_environment_naming_info")
80
- def _validate_removed_environment_naming_info(
81
- cls, v: t.Optional[EnvironmentNamingInfo], info: ValidationInfo
82
- ) -> t.Optional[EnvironmentNamingInfo]:
83
- if v and not info.data.get("removed"):
84
- raise ValueError("removed_environment_naming_info must be None if removed is empty")
85
- return v
86
-
87
-
88
77
  class StateReader(abc.ABC):
89
78
  """Abstract base class for read-only operations on snapshot and environment state."""
90
79
 
@@ -314,15 +303,21 @@ class StateReader(abc.ABC):
314
303
 
315
304
  @abc.abstractmethod
316
305
  def get_expired_snapshots(
317
- self, current_ts: t.Optional[int] = None, ignore_ttl: bool = False
318
- ) -> t.List[SnapshotTableCleanupTask]:
319
- """Aggregates the id's of the expired snapshots and creates a list of table cleanup tasks.
306
+ self,
307
+ *,
308
+ batch_range: ExpiredBatchRange,
309
+ current_ts: t.Optional[int] = None,
310
+ ignore_ttl: bool = False,
311
+ ) -> t.Optional[ExpiredSnapshotBatch]:
312
+ """Returns a single batch of expired snapshots ordered by (updated_ts, name, identifier).
320
313
 
321
- Expired snapshots are snapshots that have exceeded their time-to-live
322
- and are no longer in use within an environment.
314
+ Args:
315
+ current_ts: Timestamp used to evaluate expiration.
316
+ ignore_ttl: If True, include snapshots regardless of TTL (only checks if unreferenced).
317
+ batch_range: The range of the batch to fetch.
323
318
 
324
319
  Returns:
325
- The list of table cleanup tasks.
320
+ A batch describing expired snapshots or None if no snapshots are pending cleanup.
326
321
  """
327
322
 
328
323
  @abc.abstractmethod
@@ -362,7 +357,10 @@ class StateSync(StateReader, abc.ABC):
362
357
 
363
358
  @abc.abstractmethod
364
359
  def delete_expired_snapshots(
365
- self, ignore_ttl: bool = False, current_ts: t.Optional[int] = None
360
+ self,
361
+ batch_range: ExpiredBatchRange,
362
+ ignore_ttl: bool = False,
363
+ current_ts: t.Optional[int] = None,
366
364
  ) -> None:
367
365
  """Removes expired snapshots.
368
366
 
@@ -370,8 +368,10 @@ class StateSync(StateReader, abc.ABC):
370
368
  and are no longer in use within an environment.
371
369
 
372
370
  Args:
371
+ batch_range: The range of snapshots to delete in this batch.
373
372
  ignore_ttl: Ignore the TTL on the snapshot when considering it expired. This has the effect of deleting
374
373
  all snapshots that are not referenced in any environment
374
+ current_ts: Timestamp used to evaluate expiration.
375
375
  """
376
376
 
377
377
  @abc.abstractmethod
@@ -390,7 +390,7 @@ class StateSync(StateReader, abc.ABC):
390
390
  @abc.abstractmethod
391
391
  def remove_intervals(
392
392
  self,
393
- snapshot_intervals: t.Sequence[t.Tuple[SnapshotInfoLike, Interval]],
393
+ snapshot_intervals: t.Sequence[t.Tuple[SnapshotIdAndVersionLike, Interval]],
394
394
  remove_shared_versions: bool = False,
395
395
  ) -> None:
396
396
  """Remove an interval from a list of snapshots and sync it to the store.
@@ -495,6 +495,7 @@ class StateSync(StateReader, abc.ABC):
495
495
  start: TimeLike,
496
496
  end: TimeLike,
497
497
  is_dev: bool = False,
498
+ last_altered_ts: t.Optional[int] = None,
498
499
  ) -> None:
499
500
  """Add an interval to a snapshot and sync it to the store.
500
501
 
@@ -503,6 +504,7 @@ class StateSync(StateReader, abc.ABC):
503
504
  start: The start of the interval to add.
504
505
  end: The end of the interval to add.
505
506
  is_dev: Indicates whether the given interval is being added while in development mode
507
+ last_altered_ts: The timestamp of the last modification of the physical table
506
508
  """
507
509
  start_ts, end_ts = snapshot.inclusive_exclusive(start, end, strict=False, expand=False)
508
510
  if not snapshot.version:
@@ -515,6 +517,8 @@ class StateSync(StateReader, abc.ABC):
515
517
  dev_version=snapshot.dev_version,
516
518
  intervals=intervals if not is_dev else [],
517
519
  dev_intervals=intervals if is_dev else [],
520
+ last_altered_ts=last_altered_ts if not is_dev else None,
521
+ dev_last_altered_ts=last_altered_ts if is_dev else None,
518
522
  )
519
523
  self.add_snapshots_intervals([snapshot_intervals])
520
524
 
@@ -7,10 +7,12 @@ from sqlmesh.core.snapshot import (
7
7
  Snapshot,
8
8
  SnapshotId,
9
9
  SnapshotIdLike,
10
+ SnapshotIdAndVersionLike,
10
11
  SnapshotInfoLike,
11
12
  )
12
13
  from sqlmesh.core.snapshot.definition import Interval, SnapshotIntervals
13
14
  from sqlmesh.core.state_sync.base import DelegatingStateSync, StateSync
15
+ from sqlmesh.core.state_sync.common import ExpiredBatchRange
14
16
  from sqlmesh.utils.date import TimeLike, now_timestamp
15
17
 
16
18
 
@@ -107,11 +109,17 @@ class CachingStateSync(DelegatingStateSync):
107
109
  self.state_sync.delete_snapshots(snapshot_ids)
108
110
 
109
111
  def delete_expired_snapshots(
110
- self, ignore_ttl: bool = False, current_ts: t.Optional[int] = None
112
+ self,
113
+ batch_range: ExpiredBatchRange,
114
+ ignore_ttl: bool = False,
115
+ current_ts: t.Optional[int] = None,
111
116
  ) -> None:
112
- current_ts = current_ts or now_timestamp()
113
117
  self.snapshot_cache.clear()
114
- self.state_sync.delete_expired_snapshots(current_ts=current_ts, ignore_ttl=ignore_ttl)
118
+ self.state_sync.delete_expired_snapshots(
119
+ batch_range=batch_range,
120
+ ignore_ttl=ignore_ttl,
121
+ current_ts=current_ts,
122
+ )
115
123
 
116
124
  def add_snapshots_intervals(self, snapshots_intervals: t.Sequence[SnapshotIntervals]) -> None:
117
125
  for snapshot_intervals in snapshots_intervals:
@@ -128,7 +136,7 @@ class CachingStateSync(DelegatingStateSync):
128
136
 
129
137
  def remove_intervals(
130
138
  self,
131
- snapshot_intervals: t.Sequence[t.Tuple[SnapshotInfoLike, Interval]],
139
+ snapshot_intervals: t.Sequence[t.Tuple[SnapshotIdAndVersionLike, Interval]],
132
140
  remove_shared_versions: bool = False,
133
141
  ) -> None:
134
142
  for s, _ in snapshot_intervals:
@@ -7,124 +7,25 @@ import itertools
7
7
  import abc
8
8
 
9
9
  from dataclasses import dataclass
10
+
11
+ from pydantic_core.core_schema import ValidationInfo
10
12
  from sqlglot import exp
11
13
 
12
- from sqlmesh.core.console import Console
13
- from sqlmesh.core.dialect import schema_
14
- from sqlmesh.utils.pydantic import PydanticModel
15
- from sqlmesh.core.environment import Environment, EnvironmentStatements
16
- from sqlmesh.utils.errors import SQLMeshError
17
- from sqlmesh.core.snapshot import Snapshot
14
+ from sqlmesh.utils.pydantic import PydanticModel, field_validator
15
+ from sqlmesh.core.environment import Environment, EnvironmentStatements, EnvironmentNamingInfo
16
+ from sqlmesh.core.snapshot import (
17
+ Snapshot,
18
+ SnapshotId,
19
+ SnapshotTableCleanupTask,
20
+ SnapshotTableInfo,
21
+ )
18
22
 
19
23
  if t.TYPE_CHECKING:
20
- from sqlmesh.core.engine_adapter.base import EngineAdapter
21
- from sqlmesh.core.state_sync.base import Versions
24
+ from sqlmesh.core.state_sync.base import Versions, StateReader
22
25
 
23
26
  logger = logging.getLogger(__name__)
24
27
 
25
-
26
- def cleanup_expired_views(
27
- default_adapter: EngineAdapter,
28
- engine_adapters: t.Dict[str, EngineAdapter],
29
- environments: t.List[Environment],
30
- warn_on_delete_failure: bool = False,
31
- console: t.Optional[Console] = None,
32
- ) -> None:
33
- expired_schema_or_catalog_environments = [
34
- environment
35
- for environment in environments
36
- if environment.suffix_target.is_schema or environment.suffix_target.is_catalog
37
- ]
38
- expired_table_environments = [
39
- environment for environment in environments if environment.suffix_target.is_table
40
- ]
41
-
42
- # We have to use the corresponding adapter if the virtual layer is gateway managed
43
- def get_adapter(gateway_managed: bool, gateway: t.Optional[str] = None) -> EngineAdapter:
44
- if gateway_managed and gateway:
45
- return engine_adapters.get(gateway, default_adapter)
46
- return default_adapter
47
-
48
- catalogs_to_drop: t.Set[t.Tuple[EngineAdapter, str]] = set()
49
- schemas_to_drop: t.Set[t.Tuple[EngineAdapter, exp.Table]] = set()
50
-
51
- # Collect schemas and catalogs to drop
52
- for engine_adapter, expired_catalog, expired_schema, suffix_target in {
53
- (
54
- (engine_adapter := get_adapter(environment.gateway_managed, snapshot.model_gateway)),
55
- snapshot.qualified_view_name.catalog_for_environment(
56
- environment.naming_info, dialect=engine_adapter.dialect
57
- ),
58
- snapshot.qualified_view_name.schema_for_environment(
59
- environment.naming_info, dialect=engine_adapter.dialect
60
- ),
61
- environment.suffix_target,
62
- )
63
- for environment in expired_schema_or_catalog_environments
64
- for snapshot in environment.snapshots
65
- if snapshot.is_model and not snapshot.is_symbolic
66
- }:
67
- if suffix_target.is_catalog:
68
- if expired_catalog:
69
- catalogs_to_drop.add((engine_adapter, expired_catalog))
70
- else:
71
- schema = schema_(expired_schema, expired_catalog)
72
- schemas_to_drop.add((engine_adapter, schema))
73
-
74
- # Drop the views for the expired environments
75
- for engine_adapter, expired_view in {
76
- (
77
- (engine_adapter := get_adapter(environment.gateway_managed, snapshot.model_gateway)),
78
- snapshot.qualified_view_name.for_environment(
79
- environment.naming_info, dialect=engine_adapter.dialect
80
- ),
81
- )
82
- for environment in expired_table_environments
83
- for snapshot in environment.snapshots
84
- if snapshot.is_model and not snapshot.is_symbolic
85
- }:
86
- try:
87
- engine_adapter.drop_view(expired_view, ignore_if_not_exists=True)
88
- if console:
89
- console.update_cleanup_progress(expired_view)
90
- except Exception as e:
91
- message = f"Failed to drop the expired environment view '{expired_view}': {e}"
92
- if warn_on_delete_failure:
93
- logger.warning(message)
94
- else:
95
- raise SQLMeshError(message) from e
96
-
97
- # Drop the schemas for the expired environments
98
- for engine_adapter, schema in schemas_to_drop:
99
- try:
100
- engine_adapter.drop_schema(
101
- schema,
102
- ignore_if_not_exists=True,
103
- cascade=True,
104
- )
105
- if console:
106
- console.update_cleanup_progress(schema.sql(dialect=engine_adapter.dialect))
107
- except Exception as e:
108
- message = f"Failed to drop the expired environment schema '{schema}': {e}"
109
- if warn_on_delete_failure:
110
- logger.warning(message)
111
- else:
112
- raise SQLMeshError(message) from e
113
-
114
- # Drop any catalogs that were associated with a snapshot where the engine adapter supports dropping catalogs
115
- # catalogs_to_drop is only populated when environment_suffix_target is set to 'catalog'
116
- for engine_adapter, catalog in catalogs_to_drop:
117
- if engine_adapter.SUPPORTS_CREATE_DROP_CATALOG:
118
- try:
119
- engine_adapter.drop_catalog(catalog)
120
- if console:
121
- console.update_cleanup_progress(catalog)
122
- except Exception as e:
123
- message = f"Failed to drop the expired environment catalog '{catalog}': {e}"
124
- if warn_on_delete_failure:
125
- logger.warning(message)
126
- else:
127
- raise SQLMeshError(message) from e
28
+ EXPIRED_SNAPSHOT_DEFAULT_BATCH_SIZE = 200
128
29
 
129
30
 
130
31
  def transactional() -> t.Callable[[t.Callable], t.Callable]:
@@ -215,3 +116,207 @@ class StateStream(abc.ABC):
215
116
  yield EnvironmentsChunk(environments)
216
117
 
217
118
  return _StateStream()
119
+
120
+
121
+ class ExpiredBatchRange(PydanticModel):
122
+ start: RowBoundary
123
+ end: t.Union[RowBoundary, LimitBoundary]
124
+
125
+ @classmethod
126
+ def init_batch_range(cls, batch_size: int) -> ExpiredBatchRange:
127
+ return ExpiredBatchRange(
128
+ start=RowBoundary.lowest_boundary(),
129
+ end=LimitBoundary(batch_size=batch_size),
130
+ )
131
+
132
+ @classmethod
133
+ def all_batch_range(cls) -> ExpiredBatchRange:
134
+ return ExpiredBatchRange(
135
+ start=RowBoundary.lowest_boundary(),
136
+ end=RowBoundary.highest_boundary(),
137
+ )
138
+
139
+ @classmethod
140
+ def _expanded_tuple_comparison(
141
+ cls,
142
+ columns: t.List[exp.Column],
143
+ values: t.List[exp.Literal],
144
+ operator: t.Type[exp.Expression],
145
+ ) -> exp.Expression:
146
+ """Generate expanded tuple comparison that works across all SQL engines.
147
+
148
+ Converts tuple comparisons like (a, b, c) OP (x, y, z) into an expanded form
149
+ that's compatible with all SQL engines, since native tuple comparisons have
150
+ inconsistent support across engines (especially DuckDB, MySQL, SQLite).
151
+
152
+ Repro of problem with DuckDB:
153
+ "SELECT * FROM VALUES(1,'2') as test(a,b) WHERE ((a, b) > (1, 'foo')) AND ((a, b) <= (10, 'baz'))"
154
+
155
+ Args:
156
+ columns: List of column expressions to compare
157
+ values: List of value expressions to compare against
158
+ operator: The comparison operator class (exp.GT, exp.GTE, exp.LT, exp.LTE)
159
+
160
+ Examples:
161
+ (a, b, c) > (x, y, z) expands to:
162
+ a > x OR (a = x AND b > y) OR (a = x AND b = y AND c > z)
163
+
164
+ (a, b, c) <= (x, y, z) expands to:
165
+ a < x OR (a = x AND b < y) OR (a = x AND b = y AND c <= z)
166
+
167
+ (a, b, c) >= (x, y, z) expands to:
168
+ a > x OR (a = x AND b > y) OR (a = x AND b = y AND c >= z)
169
+
170
+ Returns:
171
+ An expanded OR expression representing the tuple comparison
172
+ """
173
+ if operator not in (exp.GT, exp.GTE, exp.LT, exp.LTE):
174
+ raise ValueError(f"Unsupported operator: {operator}. Use GT, GTE, LT, or LTE.")
175
+
176
+ # For <= and >=, we use the strict operator for all but the last column
177
+ # e.g., (a, b) <= (x, y) becomes: a < x OR (a = x AND b <= y)
178
+ # For < and >, we use the strict operator throughout
179
+ # e.g., (a, b) > (x, y) becomes: a > x OR (a = x AND b > x)
180
+ strict_operator: t.Type[exp.Expression]
181
+ final_operator: t.Type[exp.Expression]
182
+
183
+ if operator in (exp.LTE, exp.GTE):
184
+ # For inclusive operators (<=, >=), use strict form for intermediate columns
185
+ # but keep inclusive form for the last column
186
+ strict_operator = exp.LT if operator == exp.LTE else exp.GT
187
+ final_operator = operator # Keep LTE/GTE for last column
188
+ else:
189
+ # For strict operators (<, >), use them throughout
190
+ strict_operator = operator
191
+ final_operator = operator
192
+
193
+ conditions: t.List[exp.Expression] = []
194
+ for i in range(len(columns)):
195
+ # Build equality conditions for all columns before current
196
+ equality_conditions = [exp.EQ(this=columns[j], expression=values[j]) for j in range(i)]
197
+
198
+ # Use the final operator for the last column, strict for others
199
+ comparison_op = final_operator if i == len(columns) - 1 else strict_operator
200
+ comparison_condition = comparison_op(this=columns[i], expression=values[i])
201
+
202
+ if equality_conditions:
203
+ conditions.append(exp.and_(*equality_conditions, comparison_condition))
204
+ else:
205
+ conditions.append(comparison_condition)
206
+
207
+ return exp.or_(*conditions) if len(conditions) > 1 else conditions[0]
208
+
209
+ @property
210
+ def where_filter(self) -> exp.Expression:
211
+ # Use expanded tuple comparisons for cross-engine compatibility
212
+ # Native tuple comparisons like (a, b) > (x, y) don't work reliably across all SQL engines
213
+ columns = [
214
+ exp.column("updated_ts"),
215
+ exp.column("name"),
216
+ exp.column("identifier"),
217
+ ]
218
+ start_values = [
219
+ exp.Literal.number(self.start.updated_ts),
220
+ exp.Literal.string(self.start.name),
221
+ exp.Literal.string(self.start.identifier),
222
+ ]
223
+
224
+ start_condition = self._expanded_tuple_comparison(columns, start_values, exp.GT)
225
+
226
+ range_filter: exp.Expression
227
+ if isinstance(self.end, RowBoundary):
228
+ end_values = [
229
+ exp.Literal.number(self.end.updated_ts),
230
+ exp.Literal.string(self.end.name),
231
+ exp.Literal.string(self.end.identifier),
232
+ ]
233
+ end_condition = self._expanded_tuple_comparison(columns, end_values, exp.LTE)
234
+ range_filter = exp.and_(start_condition, end_condition)
235
+ else:
236
+ range_filter = start_condition
237
+ return range_filter
238
+
239
+
240
+ class RowBoundary(PydanticModel):
241
+ updated_ts: int
242
+ name: str
243
+ identifier: str
244
+
245
+ @classmethod
246
+ def lowest_boundary(cls) -> RowBoundary:
247
+ return RowBoundary(updated_ts=0, name="", identifier="")
248
+
249
+ @classmethod
250
+ def highest_boundary(cls) -> RowBoundary:
251
+ # 9999-12-31T23:59:59.999Z in epoch milliseconds
252
+ return RowBoundary(updated_ts=253_402_300_799_999, name="", identifier="")
253
+
254
+
255
+ class LimitBoundary(PydanticModel):
256
+ batch_size: int
257
+
258
+ @classmethod
259
+ def init_batch_boundary(cls, batch_size: int) -> LimitBoundary:
260
+ return LimitBoundary(batch_size=batch_size)
261
+
262
+
263
+ class PromotionResult(PydanticModel):
264
+ added: t.List[SnapshotTableInfo]
265
+ removed: t.List[SnapshotTableInfo]
266
+ removed_environment_naming_info: t.Optional[EnvironmentNamingInfo]
267
+
268
+ @field_validator("removed_environment_naming_info")
269
+ def _validate_removed_environment_naming_info(
270
+ cls, v: t.Optional[EnvironmentNamingInfo], info: ValidationInfo
271
+ ) -> t.Optional[EnvironmentNamingInfo]:
272
+ if v and not info.data.get("removed"):
273
+ raise ValueError("removed_environment_naming_info must be None if removed is empty")
274
+ return v
275
+
276
+
277
+ class ExpiredSnapshotBatch(PydanticModel):
278
+ """A batch of expired snapshots to be cleaned up."""
279
+
280
+ expired_snapshot_ids: t.Set[SnapshotId]
281
+ cleanup_tasks: t.List[SnapshotTableCleanupTask]
282
+ batch_range: ExpiredBatchRange
283
+
284
+
285
+ def iter_expired_snapshot_batches(
286
+ state_reader: StateReader,
287
+ *,
288
+ current_ts: int,
289
+ ignore_ttl: bool = False,
290
+ batch_size: t.Optional[int] = None,
291
+ ) -> t.Iterator[ExpiredSnapshotBatch]:
292
+ """Yields expired snapshot batches.
293
+
294
+ Args:
295
+ state_reader: StateReader instance to query expired snapshots from.
296
+ current_ts: Timestamp used to evaluate expiration.
297
+ ignore_ttl: If True, include snapshots regardless of TTL (only checks if unreferenced).
298
+ batch_size: Maximum number of snapshots to fetch per batch.
299
+ """
300
+
301
+ batch_size = batch_size if batch_size is not None else EXPIRED_SNAPSHOT_DEFAULT_BATCH_SIZE
302
+ batch_range = ExpiredBatchRange.init_batch_range(batch_size=batch_size)
303
+
304
+ while True:
305
+ batch = state_reader.get_expired_snapshots(
306
+ current_ts=current_ts,
307
+ ignore_ttl=ignore_ttl,
308
+ batch_range=batch_range,
309
+ )
310
+
311
+ if batch is None:
312
+ return
313
+
314
+ yield batch
315
+
316
+ assert isinstance(batch.batch_range.end, RowBoundary), (
317
+ "Only RowBoundary is supported for pagination currently"
318
+ )
319
+ batch_range = ExpiredBatchRange(
320
+ start=batch.batch_range.end,
321
+ end=LimitBoundary(batch_size=batch_size),
322
+ )
@@ -31,10 +31,10 @@ from sqlmesh.core.snapshot import (
31
31
  SnapshotIdAndVersion,
32
32
  SnapshotId,
33
33
  SnapshotIdLike,
34
+ SnapshotIdAndVersionLike,
34
35
  SnapshotInfoLike,
35
36
  SnapshotIntervals,
36
37
  SnapshotNameVersion,
37
- SnapshotTableCleanupTask,
38
38
  SnapshotTableInfo,
39
39
  start_date,
40
40
  )
@@ -42,7 +42,6 @@ from sqlmesh.core.snapshot.definition import (
42
42
  Interval,
43
43
  )
44
44
  from sqlmesh.core.state_sync.base import (
45
- PromotionResult,
46
45
  StateSync,
47
46
  Versions,
48
47
  )
@@ -54,6 +53,9 @@ from sqlmesh.core.state_sync.common import (
54
53
  StateStream,
55
54
  chunk_iterable,
56
55
  EnvironmentWithStatements,
56
+ ExpiredSnapshotBatch,
57
+ PromotionResult,
58
+ ExpiredBatchRange,
57
59
  )
58
60
  from sqlmesh.core.state_sync.db.interval import IntervalState
59
61
  from sqlmesh.core.state_sync.db.environment import EnvironmentState
@@ -260,11 +262,18 @@ class EngineAdapterStateSync(StateSync):
260
262
  self.environment_state.invalidate_environment(name, protect_prod)
261
263
 
262
264
  def get_expired_snapshots(
263
- self, current_ts: t.Optional[int] = None, ignore_ttl: bool = False
264
- ) -> t.List[SnapshotTableCleanupTask]:
265
+ self,
266
+ *,
267
+ batch_range: ExpiredBatchRange,
268
+ current_ts: t.Optional[int] = None,
269
+ ignore_ttl: bool = False,
270
+ ) -> t.Optional[ExpiredSnapshotBatch]:
265
271
  current_ts = current_ts or now_timestamp()
266
272
  return self.snapshot_state.get_expired_snapshots(
267
- self.environment_state.get_environments(), current_ts=current_ts, ignore_ttl=ignore_ttl
273
+ environments=self.environment_state.get_environments(),
274
+ current_ts=current_ts,
275
+ ignore_ttl=ignore_ttl,
276
+ batch_range=batch_range,
268
277
  )
269
278
 
270
279
  def get_expired_environments(self, current_ts: int) -> t.List[EnvironmentSummary]:
@@ -272,14 +281,19 @@ class EngineAdapterStateSync(StateSync):
272
281
 
273
282
  @transactional()
274
283
  def delete_expired_snapshots(
275
- self, ignore_ttl: bool = False, current_ts: t.Optional[int] = None
284
+ self,
285
+ batch_range: ExpiredBatchRange,
286
+ ignore_ttl: bool = False,
287
+ current_ts: t.Optional[int] = None,
276
288
  ) -> None:
277
- current_ts = current_ts or now_timestamp()
278
- for expired_snapshot_ids, cleanup_targets in self.snapshot_state._get_expired_snapshots(
279
- self.environment_state.get_environments(), ignore_ttl=ignore_ttl, current_ts=current_ts
280
- ):
281
- self.snapshot_state.delete_snapshots(expired_snapshot_ids)
282
- self.interval_state.cleanup_intervals(cleanup_targets, expired_snapshot_ids)
289
+ batch = self.get_expired_snapshots(
290
+ ignore_ttl=ignore_ttl,
291
+ current_ts=current_ts,
292
+ batch_range=batch_range,
293
+ )
294
+ if batch and batch.expired_snapshot_ids:
295
+ self.snapshot_state.delete_snapshots(batch.expired_snapshot_ids)
296
+ self.interval_state.cleanup_intervals(batch.cleanup_tasks, batch.expired_snapshot_ids)
283
297
 
284
298
  @transactional()
285
299
  def delete_expired_environments(
@@ -380,8 +394,9 @@ class EngineAdapterStateSync(StateSync):
380
394
  start: TimeLike,
381
395
  end: TimeLike,
382
396
  is_dev: bool = False,
397
+ last_altered_ts: t.Optional[int] = None,
383
398
  ) -> None:
384
- super().add_interval(snapshot, start, end, is_dev)
399
+ super().add_interval(snapshot, start, end, is_dev, last_altered_ts)
385
400
 
386
401
  @transactional()
387
402
  def add_snapshots_intervals(self, snapshots_intervals: t.Sequence[SnapshotIntervals]) -> None:
@@ -407,7 +422,7 @@ class EngineAdapterStateSync(StateSync):
407
422
  @transactional()
408
423
  def remove_intervals(
409
424
  self,
410
- snapshot_intervals: t.Sequence[t.Tuple[SnapshotInfoLike, Interval]],
425
+ snapshot_intervals: t.Sequence[t.Tuple[SnapshotIdAndVersionLike, Interval]],
411
426
  remove_shared_versions: bool = False,
412
427
  ) -> None:
413
428
  self.interval_state.remove_intervals(snapshot_intervals, remove_shared_versions)
@@ -454,7 +469,7 @@ class EngineAdapterStateSync(StateSync):
454
469
  ) -> None:
455
470
  """Migrate the state sync to the latest SQLMesh / SQLGlot version."""
456
471
  self.migrator.migrate(
457
- self,
472
+ self.schema,
458
473
  skip_backup=skip_backup,
459
474
  promoted_snapshots_only=promoted_snapshots_only,
460
475
  )