sqlmesh 0.217.1.dev1__py3-none-any.whl → 0.227.2.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. sqlmesh/__init__.py +12 -2
  2. sqlmesh/_version.py +2 -2
  3. sqlmesh/cli/project_init.py +10 -2
  4. sqlmesh/core/_typing.py +1 -0
  5. sqlmesh/core/audit/definition.py +8 -2
  6. sqlmesh/core/config/__init__.py +1 -1
  7. sqlmesh/core/config/connection.py +17 -5
  8. sqlmesh/core/config/dbt.py +13 -0
  9. sqlmesh/core/config/janitor.py +12 -0
  10. sqlmesh/core/config/loader.py +7 -0
  11. sqlmesh/core/config/model.py +2 -0
  12. sqlmesh/core/config/root.py +3 -0
  13. sqlmesh/core/console.py +80 -2
  14. sqlmesh/core/constants.py +1 -1
  15. sqlmesh/core/context.py +61 -25
  16. sqlmesh/core/dialect.py +3 -0
  17. sqlmesh/core/engine_adapter/_typing.py +2 -0
  18. sqlmesh/core/engine_adapter/base.py +322 -22
  19. sqlmesh/core/engine_adapter/base_postgres.py +17 -1
  20. sqlmesh/core/engine_adapter/bigquery.py +146 -7
  21. sqlmesh/core/engine_adapter/clickhouse.py +17 -13
  22. sqlmesh/core/engine_adapter/databricks.py +33 -2
  23. sqlmesh/core/engine_adapter/fabric.py +1 -29
  24. sqlmesh/core/engine_adapter/mixins.py +142 -48
  25. sqlmesh/core/engine_adapter/mssql.py +15 -4
  26. sqlmesh/core/engine_adapter/mysql.py +2 -2
  27. sqlmesh/core/engine_adapter/postgres.py +9 -3
  28. sqlmesh/core/engine_adapter/redshift.py +4 -0
  29. sqlmesh/core/engine_adapter/risingwave.py +1 -0
  30. sqlmesh/core/engine_adapter/shared.py +6 -0
  31. sqlmesh/core/engine_adapter/snowflake.py +82 -11
  32. sqlmesh/core/engine_adapter/spark.py +14 -10
  33. sqlmesh/core/engine_adapter/trino.py +4 -2
  34. sqlmesh/core/janitor.py +181 -0
  35. sqlmesh/core/lineage.py +1 -0
  36. sqlmesh/core/macros.py +35 -13
  37. sqlmesh/core/model/common.py +2 -0
  38. sqlmesh/core/model/definition.py +65 -4
  39. sqlmesh/core/model/kind.py +66 -2
  40. sqlmesh/core/model/meta.py +107 -2
  41. sqlmesh/core/node.py +101 -2
  42. sqlmesh/core/plan/builder.py +15 -10
  43. sqlmesh/core/plan/common.py +196 -2
  44. sqlmesh/core/plan/definition.py +21 -6
  45. sqlmesh/core/plan/evaluator.py +72 -113
  46. sqlmesh/core/plan/explainer.py +90 -8
  47. sqlmesh/core/plan/stages.py +42 -21
  48. sqlmesh/core/renderer.py +26 -18
  49. sqlmesh/core/scheduler.py +60 -19
  50. sqlmesh/core/selector.py +137 -9
  51. sqlmesh/core/signal.py +64 -1
  52. sqlmesh/core/snapshot/__init__.py +1 -0
  53. sqlmesh/core/snapshot/definition.py +109 -25
  54. sqlmesh/core/snapshot/evaluator.py +610 -50
  55. sqlmesh/core/state_sync/__init__.py +0 -1
  56. sqlmesh/core/state_sync/base.py +31 -27
  57. sqlmesh/core/state_sync/cache.py +12 -4
  58. sqlmesh/core/state_sync/common.py +216 -111
  59. sqlmesh/core/state_sync/db/facade.py +30 -15
  60. sqlmesh/core/state_sync/db/interval.py +27 -7
  61. sqlmesh/core/state_sync/db/migrator.py +14 -8
  62. sqlmesh/core/state_sync/db/snapshot.py +119 -87
  63. sqlmesh/core/table_diff.py +2 -2
  64. sqlmesh/core/test/definition.py +14 -9
  65. sqlmesh/dbt/adapter.py +20 -11
  66. sqlmesh/dbt/basemodel.py +52 -41
  67. sqlmesh/dbt/builtin.py +27 -11
  68. sqlmesh/dbt/column.py +17 -5
  69. sqlmesh/dbt/common.py +4 -2
  70. sqlmesh/dbt/context.py +14 -1
  71. sqlmesh/dbt/loader.py +60 -8
  72. sqlmesh/dbt/manifest.py +136 -8
  73. sqlmesh/dbt/model.py +105 -25
  74. sqlmesh/dbt/package.py +16 -1
  75. sqlmesh/dbt/profile.py +3 -3
  76. sqlmesh/dbt/project.py +12 -7
  77. sqlmesh/dbt/seed.py +1 -1
  78. sqlmesh/dbt/source.py +6 -1
  79. sqlmesh/dbt/target.py +25 -6
  80. sqlmesh/dbt/test.py +31 -1
  81. sqlmesh/migrations/v0000_baseline.py +3 -6
  82. sqlmesh/migrations/v0061_mysql_fix_blob_text_type.py +2 -5
  83. sqlmesh/migrations/v0062_add_model_gateway.py +2 -2
  84. sqlmesh/migrations/v0063_change_signals.py +2 -4
  85. sqlmesh/migrations/v0064_join_when_matched_strings.py +2 -4
  86. sqlmesh/migrations/v0065_add_model_optimize.py +2 -2
  87. sqlmesh/migrations/v0066_add_auto_restatements.py +2 -6
  88. sqlmesh/migrations/v0067_add_tsql_date_full_precision.py +2 -2
  89. sqlmesh/migrations/v0068_include_unrendered_query_in_metadata_hash.py +2 -2
  90. sqlmesh/migrations/v0069_update_dev_table_suffix.py +2 -4
  91. sqlmesh/migrations/v0070_include_grains_in_metadata_hash.py +2 -2
  92. sqlmesh/migrations/v0071_add_dev_version_to_intervals.py +2 -6
  93. sqlmesh/migrations/v0072_add_environment_statements.py +2 -4
  94. sqlmesh/migrations/v0073_remove_symbolic_disable_restatement.py +2 -4
  95. sqlmesh/migrations/v0074_add_partition_by_time_column_property.py +2 -2
  96. sqlmesh/migrations/v0075_remove_validate_query.py +2 -4
  97. sqlmesh/migrations/v0076_add_cron_tz.py +2 -2
  98. sqlmesh/migrations/v0077_fix_column_type_hash_calculation.py +2 -2
  99. sqlmesh/migrations/v0078_warn_if_non_migratable_python_env.py +2 -4
  100. sqlmesh/migrations/v0079_add_gateway_managed_property.py +7 -9
  101. sqlmesh/migrations/v0080_add_batch_size_to_scd_type_2_models.py +2 -2
  102. sqlmesh/migrations/v0081_update_partitioned_by.py +2 -4
  103. sqlmesh/migrations/v0082_warn_if_incorrectly_duplicated_statements.py +2 -4
  104. sqlmesh/migrations/v0083_use_sql_for_scd_time_data_type_data_hash.py +2 -2
  105. sqlmesh/migrations/v0084_normalize_quote_when_matched_and_merge_filter.py +2 -2
  106. sqlmesh/migrations/v0085_deterministic_repr.py +2 -4
  107. sqlmesh/migrations/v0086_check_deterministic_bug.py +2 -4
  108. sqlmesh/migrations/v0087_normalize_blueprint_variables.py +2 -4
  109. sqlmesh/migrations/v0088_warn_about_variable_python_env_diffs.py +2 -4
  110. sqlmesh/migrations/v0089_add_virtual_environment_mode.py +2 -2
  111. sqlmesh/migrations/v0090_add_forward_only_column.py +2 -6
  112. sqlmesh/migrations/v0091_on_additive_change.py +2 -2
  113. sqlmesh/migrations/v0092_warn_about_dbt_data_type_diff.py +2 -4
  114. sqlmesh/migrations/v0093_use_raw_sql_in_fingerprint.py +2 -2
  115. sqlmesh/migrations/v0094_add_dev_version_and_fingerprint_columns.py +2 -6
  116. sqlmesh/migrations/v0095_warn_about_dbt_raw_sql_diff.py +2 -4
  117. sqlmesh/migrations/v0096_remove_plan_dags_table.py +2 -4
  118. sqlmesh/migrations/v0097_add_dbt_name_in_node.py +2 -2
  119. sqlmesh/migrations/v0098_add_dbt_node_info_in_node.py +103 -0
  120. sqlmesh/migrations/v0099_add_last_altered_to_intervals.py +25 -0
  121. sqlmesh/migrations/v0100_add_grants_and_grants_target_layer.py +9 -0
  122. sqlmesh/utils/__init__.py +8 -1
  123. sqlmesh/utils/cache.py +5 -1
  124. sqlmesh/utils/date.py +1 -1
  125. sqlmesh/utils/errors.py +4 -0
  126. sqlmesh/utils/jinja.py +25 -2
  127. sqlmesh/utils/pydantic.py +6 -6
  128. sqlmesh/utils/windows.py +13 -3
  129. {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev4.dist-info}/METADATA +5 -5
  130. {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev4.dist-info}/RECORD +181 -176
  131. sqlmesh_dbt/cli.py +70 -7
  132. sqlmesh_dbt/console.py +14 -6
  133. sqlmesh_dbt/operations.py +103 -24
  134. sqlmesh_dbt/selectors.py +39 -1
  135. web/client/dist/assets/{Audits-Ucsx1GzF.js → Audits-CBiYyyx-.js} +1 -1
  136. web/client/dist/assets/{Banner-BWDzvavM.js → Banner-DSRbUlO5.js} +1 -1
  137. web/client/dist/assets/{ChevronDownIcon-D2VL13Ah.js → ChevronDownIcon-MK_nrjD_.js} +1 -1
  138. web/client/dist/assets/{ChevronRightIcon-DWGYbf1l.js → ChevronRightIcon-CLWtT22Q.js} +1 -1
  139. web/client/dist/assets/{Content-DdHDZM3I.js → Content-BNuGZN5l.js} +1 -1
  140. web/client/dist/assets/{Content-Bikfy8fh.js → Content-CSHJyW0n.js} +1 -1
  141. web/client/dist/assets/{Data-CzAJH7rW.js → Data-C1oRDbLx.js} +1 -1
  142. web/client/dist/assets/{DataCatalog-BJF11g8f.js → DataCatalog-HXyX2-_j.js} +1 -1
  143. web/client/dist/assets/{Editor-s0SBpV2y.js → Editor-BDyfpUuw.js} +1 -1
  144. web/client/dist/assets/{Editor-DgLhgKnm.js → Editor-D0jNItwC.js} +1 -1
  145. web/client/dist/assets/{Errors-D0m0O1d3.js → Errors-BfuFLcPi.js} +1 -1
  146. web/client/dist/assets/{FileExplorer-CEv0vXkt.js → FileExplorer-BR9IE3he.js} +1 -1
  147. web/client/dist/assets/{Footer-BwzXn8Ew.js → Footer-CgBEtiAh.js} +1 -1
  148. web/client/dist/assets/{Header-6heDkEqG.js → Header-DSqR6nSO.js} +1 -1
  149. web/client/dist/assets/{Input-obuJsD6k.js → Input-B-oZ6fGO.js} +1 -1
  150. web/client/dist/assets/Lineage-DYQVwDbD.js +1 -0
  151. web/client/dist/assets/{ListboxShow-HM9_qyrt.js → ListboxShow-BE5-xevs.js} +1 -1
  152. web/client/dist/assets/{ModelLineage-zWdKo0U2.js → ModelLineage-DkIFAYo4.js} +1 -1
  153. web/client/dist/assets/{Models-Bcu66SRz.js → Models-D5dWr8RB.js} +1 -1
  154. web/client/dist/assets/{Page-BWEEQfIt.js → Page-C-XfU5BR.js} +1 -1
  155. web/client/dist/assets/{Plan-C4gXCqlf.js → Plan-ZEuTINBq.js} +1 -1
  156. web/client/dist/assets/{PlusCircleIcon-CVDO651q.js → PlusCircleIcon-DVXAHG8_.js} +1 -1
  157. web/client/dist/assets/{ReportErrors-BT6xFwAr.js → ReportErrors-B7FEPzMB.js} +1 -1
  158. web/client/dist/assets/{Root-ryJoBK4h.js → Root-8aZyhPxF.js} +1 -1
  159. web/client/dist/assets/{SearchList-DB04sPb9.js → SearchList-W_iT2G82.js} +1 -1
  160. web/client/dist/assets/{SelectEnvironment-CUYcXUu6.js → SelectEnvironment-C65jALmO.js} +1 -1
  161. web/client/dist/assets/{SourceList-Doo_9ZGp.js → SourceList-DSLO6nVJ.js} +1 -1
  162. web/client/dist/assets/{SourceListItem-D5Mj7Dly.js → SourceListItem-BHt8d9-I.js} +1 -1
  163. web/client/dist/assets/{SplitPane-qHmkD1qy.js → SplitPane-CViaZmw6.js} +1 -1
  164. web/client/dist/assets/{Tests-DH1Z74ML.js → Tests-DhaVt5t1.js} +1 -1
  165. web/client/dist/assets/{Welcome-DqUJUNMF.js → Welcome-DvpjH-_4.js} +1 -1
  166. web/client/dist/assets/context-BctCsyGb.js +71 -0
  167. web/client/dist/assets/{context-Dr54UHLi.js → context-DFNeGsFF.js} +1 -1
  168. web/client/dist/assets/{editor-DYIP1yQ4.js → editor-CcO28cqd.js} +1 -1
  169. web/client/dist/assets/{file-DarlIDVi.js → file-CvJN3aZO.js} +1 -1
  170. web/client/dist/assets/{floating-ui.react-dom-BH3TFvkM.js → floating-ui.react-dom-CjE-JNW1.js} +1 -1
  171. web/client/dist/assets/{help-Bl8wqaQc.js → help-DuPhjipa.js} +1 -1
  172. web/client/dist/assets/{index-D1sR7wpN.js → index-C-dJH7yZ.js} +1 -1
  173. web/client/dist/assets/{index-O3mjYpnE.js → index-Dj0i1-CA.js} +2 -2
  174. web/client/dist/assets/{plan-CehRrJUG.js → plan-BTRSbjKn.js} +1 -1
  175. web/client/dist/assets/{popover-CqgMRE0G.js → popover-_Sf0yvOI.js} +1 -1
  176. web/client/dist/assets/{project-6gxepOhm.js → project-BvSOI8MY.js} +1 -1
  177. web/client/dist/index.html +1 -1
  178. web/client/dist/assets/Lineage-D0Hgdz2v.js +0 -1
  179. web/client/dist/assets/context-DgX0fp2E.js +0 -68
  180. {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev4.dist-info}/WHEEL +0 -0
  181. {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev4.dist-info}/entry_points.txt +0 -0
  182. {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev4.dist-info}/licenses/LICENSE +0 -0
  183. {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev4.dist-info}/top_level.txt +0 -0
sqlmesh/core/node.py CHANGED
@@ -153,6 +153,101 @@ class IntervalUnit(str, Enum):
153
153
  return self.seconds * 1000
154
154
 
155
155
 
156
+ class DbtNodeInfo(PydanticModel):
157
+ """
158
+ Represents dbt-specific model information set by the dbt loader and intended to be made available at the Snapshot level
159
+ (as opposed to hidden within the individual model jinja macro registries).
160
+
161
+ This allows for things like injecting implementations of variables / functions into the Jinja context that are compatible with
162
+ their dbt equivalents but are backed by the sqlmesh snapshots in any given plan / environment
163
+ """
164
+
165
+ unique_id: str
166
+ """This is the node/resource name/unique_id that's used as the node key in the dbt manifest.
167
+ It's prefixed by the resource type and is exposed in context variables like {{ selected_resources }}.
168
+
169
+ Examples:
170
+ - test.jaffle_shop.unique_stg_orders_order_id.e3b841c71a
171
+ - seed.jaffle_shop.raw_payments
172
+ - model.jaffle_shop.stg_orders
173
+ """
174
+
175
+ name: str
176
+ """Name of this object in the dbt global namespace, used by things like {{ ref() }} calls.
177
+
178
+ Examples:
179
+ - unique_stg_orders_order_id
180
+ - raw_payments
181
+ - stg_orders
182
+ """
183
+
184
+ fqn: str
185
+ """Used for selectors in --select/--exclude.
186
+ Takes the filesystem into account so may be structured differently to :unique_id.
187
+
188
+ Examples:
189
+ - jaffle_shop.staging.unique_stg_orders_order_id
190
+ - jaffle_shop.raw_payments
191
+ - jaffle_shop.staging.stg_orders
192
+ """
193
+
194
+ alias: t.Optional[str] = None
195
+ """This is dbt's way of overriding the _physical table_ a model is written to.
196
+
197
+ It's used in the following situation:
198
+ - Say you have two models, "stg_customers" and "customers"
199
+ - You want "stg_customers" to be written to the "staging" schema as eg "staging.customers" - NOT "staging.stg_customers"
200
+ - But you cant rename the file to "customers" because it will conflict with your other model file "customers"
201
+ - Even if you put it in a different folder, eg "staging/customers.sql" - dbt still has a global namespace so it will conflict
202
+ when you try to do something like "{{ ref('customers') }}"
203
+ - So dbt's solution to this problem is to keep calling it "stg_customers" at the dbt project/model level,
204
+ but allow overriding the physical table to "customers" via something like "{{ config(alias='customers', schema='staging') }}"
205
+
206
+ Note that if :alias is set, it does *not* replace :name at the model level and cannot be used interchangably with :name.
207
+ It also does not affect the :fqn or :unique_id. It's just used to override :name when it comes time to generate the physical table name.
208
+ """
209
+
210
+ @model_validator(mode="after")
211
+ def post_init(self) -> Self:
212
+ # by default, dbt sets alias to the same as :name
213
+ # however, we only want to include :alias if it is actually different / actually providing an override
214
+ if self.alias == self.name:
215
+ self.alias = None
216
+ return self
217
+
218
+ def to_expression(self) -> exp.Expression:
219
+ """Produce a SQLGlot expression representing this object, for use in things like the model/audit definition renderers"""
220
+ return exp.tuple_(
221
+ *(
222
+ exp.PropertyEQ(this=exp.var(k), expression=exp.Literal.string(v))
223
+ for k, v in sorted(self.model_dump(exclude_none=True).items())
224
+ )
225
+ )
226
+
227
+
228
+ class DbtInfoMixin:
229
+ """This mixin encapsulates properties that only exist for dbt compatibility and are otherwise not required
230
+ for native projects"""
231
+
232
+ @property
233
+ def dbt_node_info(self) -> t.Optional[DbtNodeInfo]:
234
+ raise NotImplementedError()
235
+
236
+ @property
237
+ def dbt_unique_id(self) -> t.Optional[str]:
238
+ """Used for compatibility with jinja context variables such as {{ selected_resources }}"""
239
+ if self.dbt_node_info:
240
+ return self.dbt_node_info.unique_id
241
+ return None
242
+
243
+ @property
244
+ def dbt_fqn(self) -> t.Optional[str]:
245
+ """Used in the selector engine for compatibility with selectors that select models by dbt fqn"""
246
+ if self.dbt_node_info:
247
+ return self.dbt_node_info.fqn
248
+ return None
249
+
250
+
156
251
  # this must be sorted in descending order
157
252
  INTERVAL_SECONDS = {
158
253
  IntervalUnit.YEAR: 60 * 60 * 24 * 365,
@@ -165,7 +260,7 @@ INTERVAL_SECONDS = {
165
260
  }
166
261
 
167
262
 
168
- class _Node(PydanticModel):
263
+ class _Node(DbtInfoMixin, PydanticModel):
169
264
  """
170
265
  Node is the core abstraction for entity that can be executed within the scheduler.
171
266
 
@@ -199,7 +294,7 @@ class _Node(PydanticModel):
199
294
  interval_unit_: t.Optional[IntervalUnit] = Field(alias="interval_unit", default=None)
200
295
  tags: t.List[str] = []
201
296
  stamp: t.Optional[str] = None
202
- dbt_name: t.Optional[str] = None # dbt node name
297
+ dbt_node_info_: t.Optional[DbtNodeInfo] = Field(alias="dbt_node_info", default=None)
203
298
  _path: t.Optional[Path] = None
204
299
  _data_hash: t.Optional[str] = None
205
300
  _metadata_hash: t.Optional[str] = None
@@ -446,6 +541,10 @@ class _Node(PydanticModel):
446
541
  """Return True if this is an audit node"""
447
542
  return False
448
543
 
544
+ @property
545
+ def dbt_node_info(self) -> t.Optional[DbtNodeInfo]:
546
+ return self.dbt_node_info_
547
+
449
548
 
450
549
  class NodeType(str, Enum):
451
550
  MODEL = "model"
@@ -65,6 +65,9 @@ class PlanBuilder:
65
65
  restate_models: A list of models for which the data should be restated for the time range
66
66
  specified in this plan. Note: models defined outside SQLMesh (external) won't be a part
67
67
  of the restatement.
68
+ restate_all_snapshots: If restatements are present, this flag indicates whether or not the intervals
69
+ being restated should be cleared from state for other versions of this model (typically, versions that are present in other environments).
70
+ If set to None, the default behaviour is to not clear anything unless the target environment is prod.
68
71
  backfill_models: A list of fully qualified model names for which the data should be backfilled as part of this plan.
69
72
  no_gaps: Whether to ensure that new snapshots for nodes that are already a
70
73
  part of the target environment have no data gaps when compared against previous
@@ -103,6 +106,7 @@ class PlanBuilder:
103
106
  execution_time: t.Optional[TimeLike] = None,
104
107
  apply: t.Optional[t.Callable[[Plan], None]] = None,
105
108
  restate_models: t.Optional[t.Iterable[str]] = None,
109
+ restate_all_snapshots: bool = False,
106
110
  backfill_models: t.Optional[t.Iterable[str]] = None,
107
111
  no_gaps: bool = False,
108
112
  skip_backfill: bool = False,
@@ -154,6 +158,7 @@ class PlanBuilder:
154
158
  self._auto_categorization_enabled = auto_categorization_enabled
155
159
  self._include_unmodified = include_unmodified
156
160
  self._restate_models = set(restate_models) if restate_models is not None else None
161
+ self._restate_all_snapshots = restate_all_snapshots
157
162
  self._effective_from = effective_from
158
163
 
159
164
  # note: this deliberately doesnt default to now() here.
@@ -277,7 +282,6 @@ class PlanBuilder:
277
282
  if self._latest_plan:
278
283
  return self._latest_plan
279
284
 
280
- self._ensure_no_new_snapshots_with_restatements()
281
285
  self._ensure_new_env_with_changes()
282
286
  self._ensure_valid_date_range()
283
287
  self._ensure_no_broken_references()
@@ -338,7 +342,9 @@ class PlanBuilder:
338
342
  directly_modified=directly_modified,
339
343
  indirectly_modified=indirectly_modified,
340
344
  deployability_index=deployability_index,
345
+ selected_models_to_restate=self._restate_models,
341
346
  restatements=restatements,
347
+ restate_all_snapshots=self._restate_all_snapshots,
342
348
  start_override_per_model=self._start_override_per_model,
343
349
  end_override_per_model=end_override_per_model,
344
350
  selected_models_to_backfill=self._backfill_models,
@@ -674,6 +680,14 @@ class PlanBuilder:
674
680
  if mode == AutoCategorizationMode.FULL:
675
681
  snapshot.categorize_as(SnapshotChangeCategory.BREAKING, forward_only)
676
682
  elif self._context_diff.indirectly_modified(snapshot.name):
683
+ if snapshot.is_materialized_view and not forward_only:
684
+ # We categorize changes as breaking to allow for instantaneous switches in a virtual layer.
685
+ # Otherwise, there might be a potentially long downtime during MVs recreation.
686
+ # In the case of forward-only changes this optimization is not applicable because we want to continue
687
+ # using the same (existing) table version.
688
+ snapshot.categorize_as(SnapshotChangeCategory.INDIRECT_BREAKING, forward_only)
689
+ return
690
+
677
691
  all_upstream_forward_only = set()
678
692
  all_upstream_categories = set()
679
693
  direct_parent_categories = set()
@@ -858,15 +872,6 @@ class PlanBuilder:
858
872
  f"""Removed {broken_references_msg} are referenced in '{snapshot.name}'. Please remove broken references before proceeding."""
859
873
  )
860
874
 
861
- def _ensure_no_new_snapshots_with_restatements(self) -> None:
862
- if self._restate_models is not None and (
863
- self._context_diff.new_snapshots or self._context_diff.modified_snapshots
864
- ):
865
- raise PlanError(
866
- "Model changes and restatements can't be a part of the same plan. "
867
- "Revert or apply changes before proceeding with restatements."
868
- )
869
-
870
875
  def _ensure_new_env_with_changes(self) -> None:
871
876
  if (
872
877
  self._is_dev
@@ -1,19 +1,40 @@
1
1
  from __future__ import annotations
2
+ import typing as t
3
+ import logging
4
+ from dataclasses import dataclass, field
2
5
 
3
- from sqlmesh.core.snapshot import Snapshot
6
+ from sqlmesh.core.state_sync import StateReader
7
+ from sqlmesh.core.snapshot import Snapshot, SnapshotId, SnapshotIdAndVersion, SnapshotNameVersion
8
+ from sqlmesh.core.snapshot.definition import Interval
9
+ from sqlmesh.utils.dag import DAG
10
+ from sqlmesh.utils.date import now_timestamp
11
+
12
+ logger = logging.getLogger(__name__)
4
13
 
5
14
 
6
15
  def should_force_rebuild(old: Snapshot, new: Snapshot) -> bool:
7
16
  if new.is_view and new.is_indirect_non_breaking and not new.is_forward_only:
8
17
  # View models always need to be rebuilt to reflect updated upstream dependencies
9
18
  return True
10
- if new.is_seed:
19
+ if new.is_seed and not (
20
+ new.is_metadata
21
+ and new.previous_version
22
+ and new.previous_version.snapshot_id(new.name) == old.snapshot_id
23
+ ):
11
24
  # Seed models always need to be rebuilt to reflect changes in the seed file
25
+ # Unless only their metadata has been updated (eg description added) and the seed file has not been touched
12
26
  return True
13
27
  return is_breaking_kind_change(old, new)
14
28
 
15
29
 
16
30
  def is_breaking_kind_change(old: Snapshot, new: Snapshot) -> bool:
31
+ if new.is_model != old.is_model:
32
+ # If one is a model and the other isn't, then we need to rebuild
33
+ return True
34
+ if not new.is_model or not old.is_model:
35
+ # If neither are models, then we don't need to rebuild
36
+ # Note that the remaining checks only apply to model snapshots
37
+ return False
17
38
  if old.virtual_environment_mode != new.virtual_environment_mode:
18
39
  # If the virtual environment mode has changed, then we need to rebuild
19
40
  return True
@@ -27,3 +48,176 @@ def is_breaking_kind_change(old: Snapshot, new: Snapshot) -> bool:
27
48
  # If the partitioning hasn't changed, then we don't need to rebuild
28
49
  return False
29
50
  return True
51
+
52
+
53
+ @dataclass
54
+ class SnapshotIntervalClearRequest:
55
+ # affected snapshot
56
+ snapshot: SnapshotIdAndVersion
57
+
58
+ # which interval to clear
59
+ interval: Interval
60
+
61
+ # which environments this snapshot is currently promoted
62
+ # note that this can be empty if the snapshot exists because its ttl has not expired
63
+ # but it is not part of any particular environment
64
+ environment_names: t.Set[str] = field(default_factory=set)
65
+
66
+ @property
67
+ def snapshot_id(self) -> SnapshotId:
68
+ return self.snapshot.snapshot_id
69
+
70
+ @property
71
+ def sorted_environment_names(self) -> t.List[str]:
72
+ return list(sorted(self.environment_names))
73
+
74
+
75
+ def identify_restatement_intervals_across_snapshot_versions(
76
+ state_reader: StateReader,
77
+ prod_restatements: t.Dict[str, Interval],
78
+ disable_restatement_models: t.Set[str],
79
+ loaded_snapshots: t.Dict[SnapshotId, Snapshot],
80
+ current_ts: t.Optional[int] = None,
81
+ ) -> t.Dict[SnapshotId, SnapshotIntervalClearRequest]:
82
+ """
83
+ Given a map of snapshot names + intervals to restate in prod:
84
+ - Look up matching snapshots (match based on name - regardless of version, to get all versions)
85
+ - For each match, also match downstream snapshots in each dev environment while filtering out models that have restatement disabled
86
+ - Return a list of all snapshots that are affected + the interval that needs to be cleared for each
87
+
88
+ The goal here is to produce a list of intervals to invalidate across all dev snapshots so that a subsequent plan or
89
+ cadence run in those environments causes the intervals to be repopulated.
90
+ """
91
+ if not prod_restatements:
92
+ return {}
93
+
94
+ # Although :loaded_snapshots is sourced from RestatementStage.all_snapshots, since the only time we ever need
95
+ # to clear intervals across all environments is for prod, the :loaded_snapshots here are always from prod
96
+ prod_name_versions: t.Set[SnapshotNameVersion] = {
97
+ s.name_version for s in loaded_snapshots.values()
98
+ }
99
+
100
+ snapshot_intervals_to_clear: t.Dict[SnapshotId, SnapshotIntervalClearRequest] = {}
101
+
102
+ for env_summary in state_reader.get_environments_summary():
103
+ # Fetch the full environment object one at a time to avoid loading all environments into memory at once
104
+ env = state_reader.get_environment(env_summary.name)
105
+ if not env:
106
+ logger.warning("Environment %s not found", env_summary.name)
107
+ continue
108
+
109
+ snapshots_by_name = {s.name: s.table_info for s in env.snapshots}
110
+
111
+ # We dont just restate matching snapshots, we also have to restate anything downstream of them
112
+ # so that if A gets restated in prod and dev has A <- B <- C, B and C get restated in dev
113
+ env_dag = DAG({s.name: {p.name for p in s.parents} for s in env.snapshots})
114
+
115
+ for restate_snapshot_name, interval in prod_restatements.items():
116
+ if restate_snapshot_name not in snapshots_by_name:
117
+ # snapshot is not promoted in this environment
118
+ continue
119
+
120
+ affected_snapshot_names = [
121
+ x
122
+ for x in ([restate_snapshot_name] + env_dag.downstream(restate_snapshot_name))
123
+ if x not in disable_restatement_models
124
+ ]
125
+
126
+ for affected_snapshot_name in affected_snapshot_names:
127
+ affected_snapshot = snapshots_by_name[affected_snapshot_name]
128
+
129
+ # Don't clear intervals for a dev snapshot if it shares the same physical version with prod.
130
+ # Otherwise, prod will be affected by what should be a dev operation
131
+ if affected_snapshot.name_version in prod_name_versions:
132
+ continue
133
+
134
+ clear_request = snapshot_intervals_to_clear.get(affected_snapshot.snapshot_id)
135
+ if not clear_request:
136
+ clear_request = SnapshotIntervalClearRequest(
137
+ snapshot=affected_snapshot.id_and_version, interval=interval
138
+ )
139
+ snapshot_intervals_to_clear[affected_snapshot.snapshot_id] = clear_request
140
+
141
+ clear_request.environment_names |= set([env.name])
142
+
143
+ # snapshot_intervals_to_clear now contains the entire hierarchy of affected snapshots based
144
+ # on building the DAG for each environment and including downstream snapshots
145
+ # but, what if there are affected snapshots that arent part of any environment?
146
+ unique_snapshot_names = set(snapshot_id.name for snapshot_id in snapshot_intervals_to_clear)
147
+
148
+ current_ts = current_ts or now_timestamp()
149
+ all_matching_non_prod_snapshots = {
150
+ s.snapshot_id: s
151
+ for s in state_reader.get_snapshots_by_names(
152
+ snapshot_names=unique_snapshot_names, current_ts=current_ts, exclude_expired=True
153
+ )
154
+ # Don't clear intervals for a snapshot if it shares the same physical version with prod.
155
+ # Otherwise, prod will be affected by what should be a dev operation
156
+ if s.name_version not in prod_name_versions
157
+ }
158
+
159
+ # identify the ones that we havent picked up yet, which are the ones that dont exist in any environment
160
+ if remaining_snapshot_ids := set(all_matching_non_prod_snapshots).difference(
161
+ snapshot_intervals_to_clear
162
+ ):
163
+ # these snapshot id's exist in isolation and may be related to a downstream dependency of the :prod_restatements,
164
+ # rather than directly related, so we can't simply look up the interval to clear based on :prod_restatements.
165
+ # To figure out the interval that should be cleared, we can match to the existing list based on name
166
+ # and conservatively take the widest interval that shows up
167
+ snapshot_name_to_widest_interval: t.Dict[str, Interval] = {}
168
+ for s_id, clear_request in snapshot_intervals_to_clear.items():
169
+ current_start, current_end = snapshot_name_to_widest_interval.get(
170
+ s_id.name, clear_request.interval
171
+ )
172
+ next_start, next_end = clear_request.interval
173
+
174
+ next_start = min(current_start, next_start)
175
+ next_end = max(current_end, next_end)
176
+
177
+ snapshot_name_to_widest_interval[s_id.name] = (next_start, next_end)
178
+
179
+ for remaining_snapshot_id in remaining_snapshot_ids:
180
+ remaining_snapshot = all_matching_non_prod_snapshots[remaining_snapshot_id]
181
+ snapshot_intervals_to_clear[remaining_snapshot_id] = SnapshotIntervalClearRequest(
182
+ snapshot=remaining_snapshot,
183
+ interval=snapshot_name_to_widest_interval[remaining_snapshot_id.name],
184
+ )
185
+
186
+ # for any affected full_history_restatement_only snapshots, we need to widen the intervals being restated to
187
+ # include the whole time range for that snapshot. This requires a call to state to load the full snapshot record,
188
+ # so we only do it if necessary
189
+ full_history_restatement_snapshot_ids = [
190
+ # FIXME: full_history_restatement_only is just one indicator that the snapshot can only be fully refreshed, the other one is Model.depends_on_self
191
+ # however, to figure out depends_on_self, we have to render all the model queries which, alongside having to fetch full snapshots from state,
192
+ # is problematic in secure environments that are deliberately isolated from arbitrary user code (since rendering a query may require user macros to be present)
193
+ # So for now, these are not considered
194
+ s_id
195
+ for s_id, s in snapshot_intervals_to_clear.items()
196
+ if s.snapshot.full_history_restatement_only
197
+ ]
198
+ if full_history_restatement_snapshot_ids:
199
+ # only load full snapshot records that we havent already loaded
200
+ additional_snapshots = state_reader.get_snapshots(
201
+ [
202
+ s.snapshot_id
203
+ for s in full_history_restatement_snapshot_ids
204
+ if s.snapshot_id not in loaded_snapshots
205
+ ]
206
+ )
207
+
208
+ all_snapshots = loaded_snapshots | additional_snapshots
209
+
210
+ for full_snapshot_id in full_history_restatement_snapshot_ids:
211
+ full_snapshot = all_snapshots[full_snapshot_id]
212
+ intervals_to_clear = snapshot_intervals_to_clear[full_snapshot_id]
213
+
214
+ original_start, original_end = intervals_to_clear.interval
215
+
216
+ # get_removal_interval() widens intervals if necessary
217
+ new_interval = full_snapshot.get_removal_interval(
218
+ start=original_start, end=original_end
219
+ )
220
+
221
+ intervals_to_clear.interval = new_interval
222
+
223
+ return snapshot_intervals_to_clear
@@ -58,7 +58,18 @@ class Plan(PydanticModel, frozen=True):
58
58
  indirectly_modified: t.Dict[SnapshotId, t.Set[SnapshotId]]
59
59
 
60
60
  deployability_index: DeployabilityIndex
61
+ selected_models_to_restate: t.Optional[t.Set[str]] = None
62
+ """Models that have been explicitly selected for restatement by a user"""
61
63
  restatements: t.Dict[SnapshotId, Interval]
64
+ """
65
+ All models being restated, which are typically the explicitly selected ones + their downstream dependencies.
66
+
67
+ Note that dev previews are also considered restatements, so :selected_models_to_restate can be empty
68
+ while :restatements is still populated with dev previews
69
+ """
70
+ restate_all_snapshots: bool
71
+ """Whether or not to clear intervals from state for other versions of the models listed in :restatements"""
72
+
62
73
  start_override_per_model: t.Optional[t.Dict[str, datetime]]
63
74
  end_override_per_model: t.Optional[t.Dict[str, datetime]]
64
75
 
@@ -202,8 +213,8 @@ class Plan(PydanticModel, frozen=True):
202
213
 
203
214
  snapshots_by_name = self.context_diff.snapshots_by_name
204
215
  snapshots = [s.table_info for s in self.snapshots.values()]
205
- promoted_snapshot_ids = None
206
- if self.is_dev and not self.include_unmodified:
216
+ promotable_snapshot_ids = None
217
+ if self.is_dev:
207
218
  if self.selected_models_to_backfill is not None:
208
219
  # Only promote models that have been explicitly selected for backfill.
209
220
  promotable_snapshot_ids = {
@@ -214,12 +225,14 @@ class Plan(PydanticModel, frozen=True):
214
225
  if m in snapshots_by_name
215
226
  ],
216
227
  }
217
- else:
228
+ elif not self.include_unmodified:
218
229
  promotable_snapshot_ids = self.context_diff.promotable_snapshot_ids.copy()
219
230
 
220
- promoted_snapshot_ids = [
221
- s.snapshot_id for s in snapshots if s.snapshot_id in promotable_snapshot_ids
222
- ]
231
+ promoted_snapshot_ids = (
232
+ [s.snapshot_id for s in snapshots if s.snapshot_id in promotable_snapshot_ids]
233
+ if promotable_snapshot_ids is not None
234
+ else None
235
+ )
223
236
 
224
237
  previous_finalized_snapshots = (
225
238
  self.context_diff.environment_snapshots
@@ -259,6 +272,7 @@ class Plan(PydanticModel, frozen=True):
259
272
  skip_backfill=self.skip_backfill,
260
273
  empty_backfill=self.empty_backfill,
261
274
  restatements={s.name: i for s, i in self.restatements.items()},
275
+ restate_all_snapshots=self.restate_all_snapshots,
262
276
  is_dev=self.is_dev,
263
277
  allow_destructive_models=self.allow_destructive_models,
264
278
  allow_additive_models=self.allow_additive_models,
@@ -303,6 +317,7 @@ class EvaluatablePlan(PydanticModel):
303
317
  skip_backfill: bool
304
318
  empty_backfill: bool
305
319
  restatements: t.Dict[str, Interval]
320
+ restate_all_snapshots: bool
306
321
  is_dev: bool
307
322
  allow_destructive_models: t.Set[str]
308
323
  allow_additive_models: t.Set[str]