sqlmesh 0.217.1.dev1__py3-none-any.whl → 0.227.2.dev20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. sqlmesh/__init__.py +12 -2
  2. sqlmesh/_version.py +2 -2
  3. sqlmesh/cli/project_init.py +10 -2
  4. sqlmesh/core/_typing.py +1 -0
  5. sqlmesh/core/audit/definition.py +8 -2
  6. sqlmesh/core/config/__init__.py +1 -1
  7. sqlmesh/core/config/connection.py +20 -5
  8. sqlmesh/core/config/dbt.py +13 -0
  9. sqlmesh/core/config/janitor.py +12 -0
  10. sqlmesh/core/config/loader.py +7 -0
  11. sqlmesh/core/config/model.py +2 -0
  12. sqlmesh/core/config/root.py +3 -0
  13. sqlmesh/core/console.py +80 -2
  14. sqlmesh/core/constants.py +1 -1
  15. sqlmesh/core/context.py +112 -35
  16. sqlmesh/core/dialect.py +3 -0
  17. sqlmesh/core/engine_adapter/_typing.py +2 -0
  18. sqlmesh/core/engine_adapter/base.py +330 -23
  19. sqlmesh/core/engine_adapter/base_postgres.py +17 -1
  20. sqlmesh/core/engine_adapter/bigquery.py +146 -7
  21. sqlmesh/core/engine_adapter/clickhouse.py +17 -13
  22. sqlmesh/core/engine_adapter/databricks.py +50 -2
  23. sqlmesh/core/engine_adapter/fabric.py +110 -29
  24. sqlmesh/core/engine_adapter/mixins.py +142 -48
  25. sqlmesh/core/engine_adapter/mssql.py +15 -4
  26. sqlmesh/core/engine_adapter/mysql.py +2 -2
  27. sqlmesh/core/engine_adapter/postgres.py +9 -3
  28. sqlmesh/core/engine_adapter/redshift.py +4 -0
  29. sqlmesh/core/engine_adapter/risingwave.py +1 -0
  30. sqlmesh/core/engine_adapter/shared.py +6 -0
  31. sqlmesh/core/engine_adapter/snowflake.py +82 -11
  32. sqlmesh/core/engine_adapter/spark.py +14 -10
  33. sqlmesh/core/engine_adapter/trino.py +5 -2
  34. sqlmesh/core/janitor.py +181 -0
  35. sqlmesh/core/lineage.py +1 -0
  36. sqlmesh/core/linter/rules/builtin.py +15 -0
  37. sqlmesh/core/loader.py +17 -30
  38. sqlmesh/core/macros.py +35 -13
  39. sqlmesh/core/model/common.py +2 -0
  40. sqlmesh/core/model/definition.py +72 -4
  41. sqlmesh/core/model/kind.py +66 -2
  42. sqlmesh/core/model/meta.py +107 -2
  43. sqlmesh/core/node.py +101 -2
  44. sqlmesh/core/plan/builder.py +15 -10
  45. sqlmesh/core/plan/common.py +196 -2
  46. sqlmesh/core/plan/definition.py +21 -6
  47. sqlmesh/core/plan/evaluator.py +72 -113
  48. sqlmesh/core/plan/explainer.py +90 -8
  49. sqlmesh/core/plan/stages.py +42 -21
  50. sqlmesh/core/renderer.py +26 -18
  51. sqlmesh/core/scheduler.py +60 -19
  52. sqlmesh/core/selector.py +137 -9
  53. sqlmesh/core/signal.py +64 -1
  54. sqlmesh/core/snapshot/__init__.py +1 -0
  55. sqlmesh/core/snapshot/definition.py +109 -25
  56. sqlmesh/core/snapshot/evaluator.py +610 -50
  57. sqlmesh/core/state_sync/__init__.py +0 -1
  58. sqlmesh/core/state_sync/base.py +31 -27
  59. sqlmesh/core/state_sync/cache.py +12 -4
  60. sqlmesh/core/state_sync/common.py +216 -111
  61. sqlmesh/core/state_sync/db/facade.py +30 -15
  62. sqlmesh/core/state_sync/db/interval.py +27 -7
  63. sqlmesh/core/state_sync/db/migrator.py +14 -8
  64. sqlmesh/core/state_sync/db/snapshot.py +119 -87
  65. sqlmesh/core/table_diff.py +2 -2
  66. sqlmesh/core/test/definition.py +14 -9
  67. sqlmesh/core/test/discovery.py +4 -0
  68. sqlmesh/dbt/adapter.py +20 -11
  69. sqlmesh/dbt/basemodel.py +52 -41
  70. sqlmesh/dbt/builtin.py +27 -11
  71. sqlmesh/dbt/column.py +17 -5
  72. sqlmesh/dbt/common.py +4 -2
  73. sqlmesh/dbt/context.py +14 -1
  74. sqlmesh/dbt/loader.py +60 -8
  75. sqlmesh/dbt/manifest.py +136 -8
  76. sqlmesh/dbt/model.py +105 -25
  77. sqlmesh/dbt/package.py +16 -1
  78. sqlmesh/dbt/profile.py +3 -3
  79. sqlmesh/dbt/project.py +12 -7
  80. sqlmesh/dbt/seed.py +1 -1
  81. sqlmesh/dbt/source.py +6 -1
  82. sqlmesh/dbt/target.py +25 -6
  83. sqlmesh/dbt/test.py +31 -1
  84. sqlmesh/integrations/github/cicd/controller.py +6 -2
  85. sqlmesh/lsp/context.py +4 -2
  86. sqlmesh/magics.py +1 -1
  87. sqlmesh/migrations/v0000_baseline.py +3 -6
  88. sqlmesh/migrations/v0061_mysql_fix_blob_text_type.py +2 -5
  89. sqlmesh/migrations/v0062_add_model_gateway.py +2 -2
  90. sqlmesh/migrations/v0063_change_signals.py +2 -4
  91. sqlmesh/migrations/v0064_join_when_matched_strings.py +2 -4
  92. sqlmesh/migrations/v0065_add_model_optimize.py +2 -2
  93. sqlmesh/migrations/v0066_add_auto_restatements.py +2 -6
  94. sqlmesh/migrations/v0067_add_tsql_date_full_precision.py +2 -2
  95. sqlmesh/migrations/v0068_include_unrendered_query_in_metadata_hash.py +2 -2
  96. sqlmesh/migrations/v0069_update_dev_table_suffix.py +2 -4
  97. sqlmesh/migrations/v0070_include_grains_in_metadata_hash.py +2 -2
  98. sqlmesh/migrations/v0071_add_dev_version_to_intervals.py +2 -6
  99. sqlmesh/migrations/v0072_add_environment_statements.py +2 -4
  100. sqlmesh/migrations/v0073_remove_symbolic_disable_restatement.py +2 -4
  101. sqlmesh/migrations/v0074_add_partition_by_time_column_property.py +2 -2
  102. sqlmesh/migrations/v0075_remove_validate_query.py +2 -4
  103. sqlmesh/migrations/v0076_add_cron_tz.py +2 -2
  104. sqlmesh/migrations/v0077_fix_column_type_hash_calculation.py +2 -2
  105. sqlmesh/migrations/v0078_warn_if_non_migratable_python_env.py +2 -4
  106. sqlmesh/migrations/v0079_add_gateway_managed_property.py +7 -9
  107. sqlmesh/migrations/v0080_add_batch_size_to_scd_type_2_models.py +2 -2
  108. sqlmesh/migrations/v0081_update_partitioned_by.py +2 -4
  109. sqlmesh/migrations/v0082_warn_if_incorrectly_duplicated_statements.py +2 -4
  110. sqlmesh/migrations/v0083_use_sql_for_scd_time_data_type_data_hash.py +2 -2
  111. sqlmesh/migrations/v0084_normalize_quote_when_matched_and_merge_filter.py +2 -2
  112. sqlmesh/migrations/v0085_deterministic_repr.py +2 -4
  113. sqlmesh/migrations/v0086_check_deterministic_bug.py +2 -4
  114. sqlmesh/migrations/v0087_normalize_blueprint_variables.py +2 -4
  115. sqlmesh/migrations/v0088_warn_about_variable_python_env_diffs.py +2 -4
  116. sqlmesh/migrations/v0089_add_virtual_environment_mode.py +2 -2
  117. sqlmesh/migrations/v0090_add_forward_only_column.py +2 -6
  118. sqlmesh/migrations/v0091_on_additive_change.py +2 -2
  119. sqlmesh/migrations/v0092_warn_about_dbt_data_type_diff.py +2 -4
  120. sqlmesh/migrations/v0093_use_raw_sql_in_fingerprint.py +2 -2
  121. sqlmesh/migrations/v0094_add_dev_version_and_fingerprint_columns.py +2 -6
  122. sqlmesh/migrations/v0095_warn_about_dbt_raw_sql_diff.py +2 -4
  123. sqlmesh/migrations/v0096_remove_plan_dags_table.py +2 -4
  124. sqlmesh/migrations/v0097_add_dbt_name_in_node.py +2 -2
  125. sqlmesh/migrations/v0098_add_dbt_node_info_in_node.py +103 -0
  126. sqlmesh/migrations/v0099_add_last_altered_to_intervals.py +25 -0
  127. sqlmesh/migrations/v0100_add_grants_and_grants_target_layer.py +9 -0
  128. sqlmesh/utils/__init__.py +8 -1
  129. sqlmesh/utils/cache.py +5 -1
  130. sqlmesh/utils/date.py +1 -1
  131. sqlmesh/utils/errors.py +4 -0
  132. sqlmesh/utils/git.py +3 -1
  133. sqlmesh/utils/jinja.py +25 -2
  134. sqlmesh/utils/pydantic.py +6 -6
  135. sqlmesh/utils/windows.py +13 -3
  136. {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/METADATA +5 -5
  137. {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/RECORD +188 -183
  138. sqlmesh_dbt/cli.py +70 -7
  139. sqlmesh_dbt/console.py +14 -6
  140. sqlmesh_dbt/operations.py +103 -24
  141. sqlmesh_dbt/selectors.py +39 -1
  142. web/client/dist/assets/{Audits-Ucsx1GzF.js → Audits-CBiYyyx-.js} +1 -1
  143. web/client/dist/assets/{Banner-BWDzvavM.js → Banner-DSRbUlO5.js} +1 -1
  144. web/client/dist/assets/{ChevronDownIcon-D2VL13Ah.js → ChevronDownIcon-MK_nrjD_.js} +1 -1
  145. web/client/dist/assets/{ChevronRightIcon-DWGYbf1l.js → ChevronRightIcon-CLWtT22Q.js} +1 -1
  146. web/client/dist/assets/{Content-DdHDZM3I.js → Content-BNuGZN5l.js} +1 -1
  147. web/client/dist/assets/{Content-Bikfy8fh.js → Content-CSHJyW0n.js} +1 -1
  148. web/client/dist/assets/{Data-CzAJH7rW.js → Data-C1oRDbLx.js} +1 -1
  149. web/client/dist/assets/{DataCatalog-BJF11g8f.js → DataCatalog-HXyX2-_j.js} +1 -1
  150. web/client/dist/assets/{Editor-s0SBpV2y.js → Editor-BDyfpUuw.js} +1 -1
  151. web/client/dist/assets/{Editor-DgLhgKnm.js → Editor-D0jNItwC.js} +1 -1
  152. web/client/dist/assets/{Errors-D0m0O1d3.js → Errors-BfuFLcPi.js} +1 -1
  153. web/client/dist/assets/{FileExplorer-CEv0vXkt.js → FileExplorer-BR9IE3he.js} +1 -1
  154. web/client/dist/assets/{Footer-BwzXn8Ew.js → Footer-CgBEtiAh.js} +1 -1
  155. web/client/dist/assets/{Header-6heDkEqG.js → Header-DSqR6nSO.js} +1 -1
  156. web/client/dist/assets/{Input-obuJsD6k.js → Input-B-oZ6fGO.js} +1 -1
  157. web/client/dist/assets/Lineage-DYQVwDbD.js +1 -0
  158. web/client/dist/assets/{ListboxShow-HM9_qyrt.js → ListboxShow-BE5-xevs.js} +1 -1
  159. web/client/dist/assets/{ModelLineage-zWdKo0U2.js → ModelLineage-DkIFAYo4.js} +1 -1
  160. web/client/dist/assets/{Models-Bcu66SRz.js → Models-D5dWr8RB.js} +1 -1
  161. web/client/dist/assets/{Page-BWEEQfIt.js → Page-C-XfU5BR.js} +1 -1
  162. web/client/dist/assets/{Plan-C4gXCqlf.js → Plan-ZEuTINBq.js} +1 -1
  163. web/client/dist/assets/{PlusCircleIcon-CVDO651q.js → PlusCircleIcon-DVXAHG8_.js} +1 -1
  164. web/client/dist/assets/{ReportErrors-BT6xFwAr.js → ReportErrors-B7FEPzMB.js} +1 -1
  165. web/client/dist/assets/{Root-ryJoBK4h.js → Root-8aZyhPxF.js} +1 -1
  166. web/client/dist/assets/{SearchList-DB04sPb9.js → SearchList-W_iT2G82.js} +1 -1
  167. web/client/dist/assets/{SelectEnvironment-CUYcXUu6.js → SelectEnvironment-C65jALmO.js} +1 -1
  168. web/client/dist/assets/{SourceList-Doo_9ZGp.js → SourceList-DSLO6nVJ.js} +1 -1
  169. web/client/dist/assets/{SourceListItem-D5Mj7Dly.js → SourceListItem-BHt8d9-I.js} +1 -1
  170. web/client/dist/assets/{SplitPane-qHmkD1qy.js → SplitPane-CViaZmw6.js} +1 -1
  171. web/client/dist/assets/{Tests-DH1Z74ML.js → Tests-DhaVt5t1.js} +1 -1
  172. web/client/dist/assets/{Welcome-DqUJUNMF.js → Welcome-DvpjH-_4.js} +1 -1
  173. web/client/dist/assets/context-BctCsyGb.js +71 -0
  174. web/client/dist/assets/{context-Dr54UHLi.js → context-DFNeGsFF.js} +1 -1
  175. web/client/dist/assets/{editor-DYIP1yQ4.js → editor-CcO28cqd.js} +1 -1
  176. web/client/dist/assets/{file-DarlIDVi.js → file-CvJN3aZO.js} +1 -1
  177. web/client/dist/assets/{floating-ui.react-dom-BH3TFvkM.js → floating-ui.react-dom-CjE-JNW1.js} +1 -1
  178. web/client/dist/assets/{help-Bl8wqaQc.js → help-DuPhjipa.js} +1 -1
  179. web/client/dist/assets/{index-D1sR7wpN.js → index-C-dJH7yZ.js} +1 -1
  180. web/client/dist/assets/{index-O3mjYpnE.js → index-Dj0i1-CA.js} +2 -2
  181. web/client/dist/assets/{plan-CehRrJUG.js → plan-BTRSbjKn.js} +1 -1
  182. web/client/dist/assets/{popover-CqgMRE0G.js → popover-_Sf0yvOI.js} +1 -1
  183. web/client/dist/assets/{project-6gxepOhm.js → project-BvSOI8MY.js} +1 -1
  184. web/client/dist/index.html +1 -1
  185. web/client/dist/assets/Lineage-D0Hgdz2v.js +0 -1
  186. web/client/dist/assets/context-DgX0fp2E.js +0 -68
  187. {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/WHEEL +0 -0
  188. {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/entry_points.txt +0 -0
  189. {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/licenses/LICENSE +0 -0
  190. {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/top_level.txt +0 -0
@@ -12,8 +12,9 @@ from sqlmesh.core.snapshot.definition import (
12
12
  Snapshot,
13
13
  SnapshotTableInfo,
14
14
  SnapshotId,
15
- Interval,
15
+ snapshots_to_dag,
16
16
  )
17
+ from sqlmesh.utils.errors import PlanError
17
18
 
18
19
 
19
20
  @dataclass
@@ -98,14 +99,19 @@ class AuditOnlyRunStage:
98
99
 
99
100
  @dataclass
100
101
  class RestatementStage:
101
- """Restate intervals for given snapshots.
102
+ """Clear intervals from state for snapshots in *other* environments, when restatements are requested in prod.
103
+
104
+ This stage is effectively a "marker" stage to trigger the plan evaluator to perform the "clear intervals" logic after the BackfillStage has completed.
105
+ The "clear intervals" logic is executed just-in-time using the latest state available in order to pick up new snapshots that may have
106
+ been created while the BackfillStage was running, which is why we do not build a list of snapshots to clear at plan time and defer to evaluation time.
107
+
108
+ Note that this stage is only present on `prod` plans because dev plans do not need to worry about clearing intervals in other environments.
102
109
 
103
110
  Args:
104
- snapshot_intervals: Intervals to restate.
105
- all_snapshots: All snapshots in the plan by name.
111
+ all_snapshots: All snapshots in the plan by name. Note that this does not include the snapshots from other environments that will get their
112
+ intervals cleared, it's included here as an optimization to prevent having to re-fetch the current plan's snapshots
106
113
  """
107
114
 
108
- snapshot_intervals: t.Dict[SnapshotTableInfo, Interval]
109
115
  all_snapshots: t.Dict[str, Snapshot]
110
116
 
111
117
 
@@ -244,6 +250,7 @@ class PlanStagesBuilder:
244
250
  stored_snapshots = self.state_reader.get_snapshots(plan.environment.snapshots)
245
251
  snapshots = {**new_snapshots, **stored_snapshots}
246
252
  snapshots_by_name = {s.name: s for s in snapshots.values()}
253
+ dag = snapshots_to_dag(snapshots.values())
247
254
 
248
255
  all_selected_for_backfill_snapshots = {
249
256
  s.snapshot_id for s in snapshots.values() if plan.is_selected_for_backfill(s.name)
@@ -261,14 +268,21 @@ class PlanStagesBuilder:
261
268
  before_promote_snapshots = {
262
269
  s.snapshot_id
263
270
  for s in snapshots.values()
264
- if deployability_index.is_representative(s)
271
+ if (deployability_index.is_representative(s) or s.is_seed)
265
272
  and plan.is_selected_for_backfill(s.name)
266
273
  }
267
274
  after_promote_snapshots = all_selected_for_backfill_snapshots - before_promote_snapshots
268
275
  deployability_index = DeployabilityIndex.all_deployable()
269
276
 
277
+ snapshot_ids_with_schema_migration = [
278
+ s.snapshot_id for s in snapshots.values() if s.requires_schema_migration_in_prod
279
+ ]
280
+ # Include all upstream dependencies of snapshots that require schema migration to make sure
281
+ # the upstream tables are created before the schema updates are applied
270
282
  snapshots_with_schema_migration = [
271
- s for s in snapshots.values() if s.requires_schema_migration_in_prod
283
+ snapshots[s_id]
284
+ for s_id in dag.subdag(*snapshot_ids_with_schema_migration)
285
+ if snapshots[s_id].supports_schema_migration_in_prod
272
286
  ]
273
287
 
274
288
  snapshots_to_intervals = self._missing_intervals(
@@ -321,10 +335,6 @@ class PlanStagesBuilder:
321
335
  if audit_only_snapshots:
322
336
  stages.append(AuditOnlyRunStage(snapshots=list(audit_only_snapshots.values())))
323
337
 
324
- restatement_stage = self._get_restatement_stage(plan, snapshots_by_name)
325
- if restatement_stage:
326
- stages.append(restatement_stage)
327
-
328
338
  if missing_intervals_before_promote:
329
339
  stages.append(
330
340
  BackfillStage(
@@ -349,6 +359,15 @@ class PlanStagesBuilder:
349
359
  )
350
360
  )
351
361
 
362
+ # note: "restatement stage" (which is clearing intervals in state - not actually performing the restatements, that's the backfill stage)
363
+ # needs to come *after* the backfill stage so that at no time do other plans / runs see empty prod intervals and compete with this plan to try to fill them.
364
+ # in addition, when we update intervals in state, we only clear intervals from dev snapshots to force dev models to be backfilled based on the new prod data.
365
+ # we can leave prod intervals alone because by the time this plan finishes, the intervals in state have not actually changed, since restatement replaces
366
+ # data for existing intervals and does not produce new ones
367
+ restatement_stage = self._get_restatement_stage(plan, snapshots_by_name)
368
+ if restatement_stage:
369
+ stages.append(restatement_stage)
370
+
352
371
  stages.append(
353
372
  EnvironmentRecordUpdateStage(
354
373
  no_gaps_snapshot_names={s.name for s in before_promote_snapshots}
@@ -443,16 +462,18 @@ class PlanStagesBuilder:
443
462
  def _get_restatement_stage(
444
463
  self, plan: EvaluatablePlan, snapshots_by_name: t.Dict[str, Snapshot]
445
464
  ) -> t.Optional[RestatementStage]:
446
- snapshot_intervals_to_restate = {}
447
- for name, interval in plan.restatements.items():
448
- restated_snapshot = snapshots_by_name[name]
449
- restated_snapshot.remove_interval(interval)
450
- snapshot_intervals_to_restate[restated_snapshot.table_info] = interval
451
- if not snapshot_intervals_to_restate or plan.is_dev:
452
- return None
453
- return RestatementStage(
454
- snapshot_intervals=snapshot_intervals_to_restate, all_snapshots=snapshots_by_name
455
- )
465
+ if plan.restate_all_snapshots:
466
+ if plan.is_dev:
467
+ raise PlanError(
468
+ "Clearing intervals from state across dev model versions is only valid for prod plans"
469
+ )
470
+
471
+ if plan.restatements:
472
+ return RestatementStage(
473
+ all_snapshots=snapshots_by_name,
474
+ )
475
+
476
+ return None
456
477
 
457
478
  def _get_physical_layer_update_stage(
458
479
  self,
sqlmesh/core/renderer.py CHANGED
@@ -6,7 +6,7 @@ from contextlib import contextmanager
6
6
  from functools import partial
7
7
  from pathlib import Path
8
8
 
9
- from sqlglot import exp, parse
9
+ from sqlglot import exp, Dialect
10
10
  from sqlglot.errors import SqlglotError
11
11
  from sqlglot.helper import ensure_list
12
12
  from sqlglot.optimizer.annotate_types import annotate_types
@@ -196,7 +196,14 @@ class BaseExpressionRenderer:
196
196
  **kwargs,
197
197
  }
198
198
 
199
+ if this_model:
200
+ render_kwargs["this_model"] = this_model
201
+
202
+ macro_evaluator.locals.update(render_kwargs)
203
+
199
204
  variables = kwargs.pop("variables", {})
205
+ if variables:
206
+ macro_evaluator.locals.setdefault(c.SQLMESH_VARS, {}).update(variables)
200
207
 
201
208
  expressions = [self._expression]
202
209
  if isinstance(self._expression, d.Jinja):
@@ -249,23 +256,24 @@ class BaseExpressionRenderer:
249
256
  ) from ex
250
257
 
251
258
  if rendered_expression.strip():
252
- try:
253
- expressions = [e for e in parse(rendered_expression, read=self._dialect) if e]
254
-
255
- if not expressions:
256
- raise ConfigError(f"Failed to parse an expression:\n{self._expression}")
257
- except Exception as ex:
258
- raise ConfigError(
259
- f"Could not parse the rendered jinja at '{self._path}'.\n{ex}"
260
- ) from ex
261
-
262
- if this_model:
263
- render_kwargs["this_model"] = this_model
264
-
265
- macro_evaluator.locals.update(render_kwargs)
266
-
267
- if variables:
268
- macro_evaluator.locals.setdefault(c.SQLMESH_VARS, {}).update(variables)
259
+ # ensure there is actual SQL and not just comments and non-SQL jinja
260
+ dialect = Dialect.get_or_raise(self._dialect)
261
+ tokens = dialect.tokenize(rendered_expression)
262
+
263
+ if tokens:
264
+ try:
265
+ expressions = [
266
+ e for e in dialect.parser().parse(tokens, rendered_expression) if e
267
+ ]
268
+
269
+ if not expressions:
270
+ raise ConfigError(
271
+ f"Failed to parse an expression:\n{rendered_expression}"
272
+ )
273
+ except Exception as ex:
274
+ raise ConfigError(
275
+ f"Could not parse the rendered jinja at '{self._path}'.\n{ex}"
276
+ ) from ex
269
277
 
270
278
  for definition in self._macro_definitions:
271
279
  try:
sqlmesh/core/scheduler.py CHANGED
@@ -251,7 +251,9 @@ class Scheduler:
251
251
  **kwargs,
252
252
  )
253
253
 
254
- self.state_sync.add_interval(snapshot, start, end, is_dev=not is_deployable)
254
+ self.state_sync.add_interval(
255
+ snapshot, start, end, is_dev=not is_deployable, last_altered_ts=now_timestamp()
256
+ )
255
257
  return audit_results
256
258
 
257
259
  def run(
@@ -335,6 +337,7 @@ class Scheduler:
335
337
  deployability_index: t.Optional[DeployabilityIndex],
336
338
  environment_naming_info: EnvironmentNamingInfo,
337
339
  dag: t.Optional[DAG[SnapshotId]] = None,
340
+ is_restatement: bool = False,
338
341
  ) -> t.Dict[Snapshot, Intervals]:
339
342
  dag = dag or snapshots_to_dag(merged_intervals)
340
343
 
@@ -349,7 +352,7 @@ class Scheduler:
349
352
  )
350
353
  for snapshot, intervals in merged_intervals.items()
351
354
  }
352
- snapshot_batches = {}
355
+ snapshot_batches: t.Dict[Snapshot, Intervals] = {}
353
356
  all_unready_intervals: t.Dict[str, set[Interval]] = {}
354
357
  for snapshot_id in dag:
355
358
  if snapshot_id not in snapshot_intervals:
@@ -361,12 +364,22 @@ class Scheduler:
361
364
 
362
365
  adapter = self.snapshot_evaluator.get_adapter(snapshot.model_gateway)
363
366
 
367
+ parent_intervals: Intervals = []
368
+ for parent_id in snapshot.parents:
369
+ parent_snapshot, _ = snapshot_intervals.get(parent_id, (None, []))
370
+ if not parent_snapshot or parent_snapshot.is_external:
371
+ continue
372
+
373
+ parent_intervals.extend(snapshot_batches[parent_snapshot])
374
+
364
375
  context = ExecutionContext(
365
376
  adapter,
366
377
  self.snapshots_by_name,
367
378
  deployability_index,
368
379
  default_dialect=adapter.dialect,
369
380
  default_catalog=self.default_catalog,
381
+ is_restatement=is_restatement,
382
+ parent_intervals=parent_intervals,
370
383
  )
371
384
 
372
385
  intervals = self._check_ready_intervals(
@@ -422,6 +435,7 @@ class Scheduler:
422
435
  run_environment_statements: bool = False,
423
436
  audit_only: bool = False,
424
437
  auto_restatement_triggers: t.Dict[SnapshotId, t.List[SnapshotId]] = {},
438
+ is_restatement: bool = False,
425
439
  ) -> t.Tuple[t.List[NodeExecutionFailedError[SchedulingUnit]], t.List[SchedulingUnit]]:
426
440
  """Runs precomputed batches of missing intervals.
427
441
 
@@ -455,9 +469,12 @@ class Scheduler:
455
469
  snapshot_dag = full_dag.subdag(*selected_snapshot_ids_set)
456
470
 
457
471
  batched_intervals = self.batch_intervals(
458
- merged_intervals, deployability_index, environment_naming_info, dag=snapshot_dag
472
+ merged_intervals,
473
+ deployability_index,
474
+ environment_naming_info,
475
+ dag=snapshot_dag,
476
+ is_restatement=is_restatement,
459
477
  )
460
-
461
478
  self.console.start_evaluation_progress(
462
479
  batched_intervals,
463
480
  environment_naming_info,
@@ -530,6 +547,10 @@ class Scheduler:
530
547
  execution_time=execution_time,
531
548
  )
532
549
  else:
550
+ # If batch_index > 0, then the target table must exist since the first batch would have created it
551
+ target_table_exists = (
552
+ snapshot.snapshot_id not in snapshots_to_create or node.batch_index > 0
553
+ )
533
554
  audit_results = self.evaluate(
534
555
  snapshot=snapshot,
535
556
  environment_naming_info=environment_naming_info,
@@ -540,7 +561,7 @@ class Scheduler:
540
561
  batch_index=node.batch_index,
541
562
  allow_destructive_snapshots=allow_destructive_snapshots,
542
563
  allow_additive_snapshots=allow_additive_snapshots,
543
- target_table_exists=snapshot.snapshot_id not in snapshots_to_create,
564
+ target_table_exists=target_table_exists,
544
565
  selected_models=selected_models,
545
566
  )
546
567
 
@@ -638,6 +659,7 @@ class Scheduler:
638
659
  }
639
660
  snapshots_to_create = snapshots_to_create or set()
640
661
  original_snapshots_to_create = snapshots_to_create.copy()
662
+ upstream_dependencies_cache: t.Dict[SnapshotId, t.Set[SchedulingUnit]] = {}
641
663
 
642
664
  snapshot_dag = snapshot_dag or snapshots_to_dag(batches)
643
665
  dag = DAG[SchedulingUnit]()
@@ -649,12 +671,15 @@ class Scheduler:
649
671
  snapshot = self.snapshots_by_name[snapshot_id.name]
650
672
  intervals = intervals_per_snapshot.get(snapshot.name, [])
651
673
 
652
- upstream_dependencies: t.List[SchedulingUnit] = []
674
+ upstream_dependencies: t.Set[SchedulingUnit] = set()
653
675
 
654
676
  for p_sid in snapshot.parents:
655
- upstream_dependencies.extend(
677
+ upstream_dependencies.update(
656
678
  self._find_upstream_dependencies(
657
- p_sid, intervals_per_snapshot, original_snapshots_to_create
679
+ p_sid,
680
+ intervals_per_snapshot,
681
+ original_snapshots_to_create,
682
+ upstream_dependencies_cache,
658
683
  )
659
684
  )
660
685
 
@@ -705,29 +730,42 @@ class Scheduler:
705
730
  parent_sid: SnapshotId,
706
731
  intervals_per_snapshot: t.Dict[str, Intervals],
707
732
  snapshots_to_create: t.Set[SnapshotId],
708
- ) -> t.List[SchedulingUnit]:
733
+ cache: t.Dict[SnapshotId, t.Set[SchedulingUnit]],
734
+ ) -> t.Set[SchedulingUnit]:
709
735
  if parent_sid not in self.snapshots:
710
- return []
736
+ return set()
737
+ if parent_sid in cache:
738
+ return cache[parent_sid]
711
739
 
712
740
  p_intervals = intervals_per_snapshot.get(parent_sid.name, [])
713
741
 
742
+ parent_node: t.Optional[SchedulingUnit] = None
714
743
  if p_intervals:
715
744
  if len(p_intervals) > 1:
716
- return [DummyNode(snapshot_name=parent_sid.name)]
717
- interval = p_intervals[0]
718
- return [EvaluateNode(snapshot_name=parent_sid.name, interval=interval, batch_index=0)]
719
- if parent_sid in snapshots_to_create:
720
- return [CreateNode(snapshot_name=parent_sid.name)]
745
+ parent_node = DummyNode(snapshot_name=parent_sid.name)
746
+ else:
747
+ interval = p_intervals[0]
748
+ parent_node = EvaluateNode(
749
+ snapshot_name=parent_sid.name, interval=interval, batch_index=0
750
+ )
751
+ elif parent_sid in snapshots_to_create:
752
+ parent_node = CreateNode(snapshot_name=parent_sid.name)
753
+
754
+ if parent_node is not None:
755
+ cache[parent_sid] = {parent_node}
756
+ return {parent_node}
757
+
721
758
  # This snapshot has no intervals and doesn't need creation which means
722
759
  # that it can be a transitive dependency
723
- transitive_deps: t.List[SchedulingUnit] = []
760
+ transitive_deps: t.Set[SchedulingUnit] = set()
724
761
  parent_snapshot = self.snapshots[parent_sid]
725
762
  for grandparent_sid in parent_snapshot.parents:
726
- transitive_deps.extend(
763
+ transitive_deps.update(
727
764
  self._find_upstream_dependencies(
728
- grandparent_sid, intervals_per_snapshot, snapshots_to_create
765
+ grandparent_sid, intervals_per_snapshot, snapshots_to_create, cache
729
766
  )
730
767
  )
768
+ cache[parent_sid] = transitive_deps
731
769
  return transitive_deps
732
770
 
733
771
  def _run_or_audit(
@@ -839,7 +877,9 @@ class Scheduler:
839
877
  run_environment_statements=run_environment_statements,
840
878
  audit_only=audit_only,
841
879
  auto_restatement_triggers=auto_restatement_triggers,
842
- selected_models={s.node.dbt_name for s in merged_intervals if s.node.dbt_name},
880
+ selected_models={
881
+ s.node.dbt_unique_id for s in merged_intervals if s.node.dbt_unique_id
882
+ },
843
883
  )
844
884
 
845
885
  return CompletionStatus.FAILURE if errors else CompletionStatus.SUCCESS
@@ -954,6 +994,7 @@ class Scheduler:
954
994
  python_env=signals.python_env,
955
995
  dialect=snapshot.model.dialect,
956
996
  path=snapshot.model._path,
997
+ snapshot=snapshot,
957
998
  kwargs=kwargs,
958
999
  )
959
1000
  except SQLMeshError as e:
sqlmesh/core/selector.py CHANGED
@@ -3,6 +3,8 @@ from __future__ import annotations
3
3
  import fnmatch
4
4
  import typing as t
5
5
  from pathlib import Path
6
+ from itertools import zip_longest
7
+ import abc
6
8
 
7
9
  from sqlglot import exp
8
10
  from sqlglot.errors import ParseError
@@ -14,6 +16,7 @@ from sqlmesh.core import constants as c
14
16
  from sqlmesh.core.dialect import normalize_model_name
15
17
  from sqlmesh.core.environment import Environment
16
18
  from sqlmesh.core.model import update_model_schemas
19
+ from sqlmesh.core.audit import StandaloneAudit
17
20
  from sqlmesh.utils import UniqueKeyDict
18
21
  from sqlmesh.utils.dag import DAG
19
22
  from sqlmesh.utils.git import GitClient
@@ -23,10 +26,11 @@ from sqlmesh.utils.errors import SQLMeshError
23
26
  if t.TYPE_CHECKING:
24
27
  from typing_extensions import Literal as Lit # noqa
25
28
  from sqlmesh.core.model import Model
29
+ from sqlmesh.core.node import Node
26
30
  from sqlmesh.core.state_sync import StateReader
27
31
 
28
32
 
29
- class Selector:
33
+ class Selector(abc.ABC):
30
34
  def __init__(
31
35
  self,
32
36
  state_reader: StateReader,
@@ -165,20 +169,20 @@ class Selector:
165
169
  return models
166
170
 
167
171
  def expand_model_selections(
168
- self, model_selections: t.Iterable[str], models: t.Optional[t.Dict[str, Model]] = None
172
+ self, model_selections: t.Iterable[str], models: t.Optional[t.Dict[str, Node]] = None
169
173
  ) -> t.Set[str]:
170
- """Expands a set of model selections into a set of model names.
174
+ """Expands a set of model selections into a set of model fqns that can be looked up in the Context.
171
175
 
172
176
  Args:
173
177
  model_selections: A set of model selections.
174
178
 
175
179
  Returns:
176
- A set of model names.
180
+ A set of model fqns.
177
181
  """
178
182
 
179
183
  node = parse(" | ".join(f"({s})" for s in model_selections))
180
184
 
181
- all_models = models or self._models
185
+ all_models: t.Dict[str, Node] = models or dict(self._models)
182
186
  models_by_tags: t.Dict[str, t.Set[str]] = {}
183
187
 
184
188
  for fqn, model in all_models.items():
@@ -194,10 +198,9 @@ class Selector:
194
198
  return {
195
199
  fqn
196
200
  for fqn, model in all_models.items()
197
- if fnmatch.fnmatchcase(model.name, node.this)
201
+ if fnmatch.fnmatchcase(self._model_name(model), node.this)
198
202
  }
199
- fqn = normalize_model_name(pattern, self._default_catalog, self._dialect)
200
- return {fqn} if fqn in all_models else set()
203
+ return self._pattern_to_model_fqns(pattern, all_models)
201
204
  if isinstance(node, exp.And):
202
205
  return evaluate(node.left) & evaluate(node.right)
203
206
  if isinstance(node, exp.Or):
@@ -225,6 +228,13 @@ class Selector:
225
228
  if fnmatch.fnmatchcase(tag, pattern)
226
229
  }
227
230
  return models_by_tags.get(pattern, set())
231
+ if isinstance(node, ResourceType):
232
+ resource_type = node.name.lower()
233
+ return {
234
+ fqn
235
+ for fqn, model in all_models.items()
236
+ if self._matches_resource_type(resource_type, model)
237
+ }
228
238
  if isinstance(node, Direction):
229
239
  selected = set()
230
240
 
@@ -241,6 +251,117 @@ class Selector:
241
251
 
242
252
  return evaluate(node)
243
253
 
254
+ @abc.abstractmethod
255
+ def _model_name(self, model: Node) -> str:
256
+ """Given a model, return the name that a selector pattern contining wildcards should be fnmatch'd on"""
257
+ pass
258
+
259
+ @abc.abstractmethod
260
+ def _pattern_to_model_fqns(self, pattern: str, all_models: t.Dict[str, Node]) -> t.Set[str]:
261
+ """Given a pattern, return the keys of the matching models from :all_models"""
262
+ pass
263
+
264
+ @abc.abstractmethod
265
+ def _matches_resource_type(self, resource_type: str, model: Node) -> bool:
266
+ """Indicate whether or not the supplied model matches the supplied resource type"""
267
+ pass
268
+
269
+
270
+ class NativeSelector(Selector):
271
+ """Implementation of selectors that matches objects based on SQLMesh native names"""
272
+
273
+ def _model_name(self, model: Node) -> str:
274
+ return model.name
275
+
276
+ def _pattern_to_model_fqns(self, pattern: str, all_models: t.Dict[str, Node]) -> t.Set[str]:
277
+ fqn = normalize_model_name(pattern, self._default_catalog, self._dialect)
278
+ return {fqn} if fqn in all_models else set()
279
+
280
+ def _matches_resource_type(self, resource_type: str, model: Node) -> bool:
281
+ if resource_type == "model":
282
+ return model.is_model
283
+ if resource_type == "audit":
284
+ return isinstance(model, StandaloneAudit)
285
+
286
+ raise SQLMeshError(f"Unsupported resource type: {resource_type}")
287
+
288
+
289
+ class DbtSelector(Selector):
290
+ """Implementation of selectors that matches objects based on the DBT names instead of the SQLMesh native names"""
291
+
292
+ def _model_name(self, model: Node) -> str:
293
+ if dbt_fqn := model.dbt_fqn:
294
+ return dbt_fqn
295
+ raise SQLMeshError("dbt node information must be populated to use dbt selectors")
296
+
297
+ def _pattern_to_model_fqns(self, pattern: str, all_models: t.Dict[str, Node]) -> t.Set[str]:
298
+ # a pattern like "staging.customers" should match a model called "jaffle_shop.staging.customers"
299
+ # but not a model called "jaffle_shop.customers.staging"
300
+ # also a pattern like "aging" should not match "staging" so we need to consider components; not substrings
301
+ pattern_components = pattern.split(".")
302
+ first_pattern_component = pattern_components[0]
303
+ matches = set()
304
+ for fqn, model in all_models.items():
305
+ if not model.dbt_fqn:
306
+ continue
307
+
308
+ dbt_fqn_components = model.dbt_fqn.split(".")
309
+ try:
310
+ starting_idx = dbt_fqn_components.index(first_pattern_component)
311
+ except ValueError:
312
+ continue
313
+ for pattern_component, fqn_component in zip_longest(
314
+ pattern_components, dbt_fqn_components[starting_idx:]
315
+ ):
316
+ if pattern_component and not fqn_component:
317
+ # the pattern still goes but we have run out of fqn components to match; no match
318
+ break
319
+ if fqn_component and not pattern_component:
320
+ # all elements of the pattern have matched elements of the fqn; match
321
+ matches.add(fqn)
322
+ break
323
+ if pattern_component != fqn_component:
324
+ # the pattern explicitly doesnt match a component; no match
325
+ break
326
+ else:
327
+ # called if no explicit break, indicating all components of the pattern matched all components of the fqn
328
+ matches.add(fqn)
329
+ return matches
330
+
331
+ def _matches_resource_type(self, resource_type: str, model: Node) -> bool:
332
+ """
333
+ ref: https://docs.getdbt.com/reference/node-selection/methods#resource_type
334
+
335
+ # supported by SQLMesh
336
+ "model"
337
+ "seed"
338
+ "source" # external model
339
+ "test" # standalone audit
340
+
341
+ # not supported by SQLMesh yet, commented out to throw an error if someone tries to use them
342
+ "analysis"
343
+ "exposure"
344
+ "metric"
345
+ "saved_query"
346
+ "semantic_model"
347
+ "snapshot"
348
+ "unit_test"
349
+ """
350
+ if resource_type not in ("model", "seed", "source", "test"):
351
+ raise SQLMeshError(f"Unsupported resource type: {resource_type}")
352
+
353
+ if isinstance(model, StandaloneAudit):
354
+ return resource_type == "test"
355
+
356
+ if resource_type == "model":
357
+ return model.is_model and not model.kind.is_external and not model.kind.is_seed
358
+ if resource_type == "source":
359
+ return model.kind.is_external
360
+ if resource_type == "seed":
361
+ return model.kind.is_seed
362
+
363
+ return False
364
+
244
365
 
245
366
  class SelectorDialect(Dialect):
246
367
  IDENTIFIERS_CAN_START_WITH_DIGIT = True
@@ -271,6 +392,10 @@ class Tag(exp.Expression):
271
392
  pass
272
393
 
273
394
 
395
+ class ResourceType(exp.Expression):
396
+ pass
397
+
398
+
274
399
  class Direction(exp.Expression):
275
400
  pass
276
401
 
@@ -323,7 +448,8 @@ def parse(selector: str, dialect: DialectType = None) -> exp.Expression:
323
448
  upstream = _match(TokenType.PLUS)
324
449
  downstream = None
325
450
  tag = _parse_kind("tag")
326
- git = False if tag else _parse_kind("git")
451
+ resource_type = False if tag else _parse_kind("resource_type")
452
+ git = False if resource_type else _parse_kind("git")
327
453
  lstar = "*" if _match(TokenType.STAR) else ""
328
454
  directions = {}
329
455
 
@@ -349,6 +475,8 @@ def parse(selector: str, dialect: DialectType = None) -> exp.Expression:
349
475
 
350
476
  if tag:
351
477
  this = Tag(this=this)
478
+ if resource_type:
479
+ this = ResourceType(this=this)
352
480
  if git:
353
481
  this = Git(this=this)
354
482
  if directions:
sqlmesh/core/signal.py CHANGED
@@ -1,7 +1,14 @@
1
1
  from __future__ import annotations
2
2
 
3
-
3
+ import typing as t
4
4
  from sqlmesh.utils import UniqueKeyDict, registry_decorator
5
+ from sqlmesh.utils.errors import MissingSourceError
6
+
7
+ if t.TYPE_CHECKING:
8
+ from sqlmesh.core.context import ExecutionContext
9
+ from sqlmesh.core.snapshot.definition import Snapshot
10
+ from sqlmesh.utils.date import DatetimeRanges
11
+ from sqlmesh.core.snapshot.definition import DeployabilityIndex
5
12
 
6
13
 
7
14
  class signal(registry_decorator):
@@ -33,3 +40,59 @@ class signal(registry_decorator):
33
40
 
34
41
 
35
42
  SignalRegistry = UniqueKeyDict[str, signal]
43
+
44
+
45
+ @signal()
46
+ def freshness(
47
+ batch: DatetimeRanges,
48
+ snapshot: Snapshot,
49
+ context: ExecutionContext,
50
+ ) -> bool:
51
+ """
52
+ Implements model freshness as a signal, i.e it considers this model to be fresh if:
53
+ - Any upstream SQLMesh model has available intervals to compute i.e is fresh
54
+ - Any upstream external model has been altered since the last time the model was evaluated
55
+ """
56
+ adapter = context.engine_adapter
57
+ if context.is_restatement or not adapter.SUPPORTS_METADATA_TABLE_LAST_MODIFIED_TS:
58
+ return True
59
+
60
+ deployability_index = context.deployability_index or DeployabilityIndex.all_deployable()
61
+
62
+ last_altered_ts = (
63
+ snapshot.last_altered_ts
64
+ if deployability_index.is_deployable(snapshot)
65
+ else snapshot.dev_last_altered_ts
66
+ )
67
+
68
+ if not last_altered_ts:
69
+ return True
70
+
71
+ parent_snapshots = {context.snapshots[p.name] for p in snapshot.parents}
72
+
73
+ upstream_parent_snapshots = {p for p in parent_snapshots if not p.is_external}
74
+ external_parents = snapshot.node.depends_on - {p.name for p in upstream_parent_snapshots}
75
+
76
+ if context.parent_intervals:
77
+ # At least one upstream sqlmesh model has intervals to compute (i.e is fresh),
78
+ # so the current model is considered fresh too
79
+ return True
80
+
81
+ if external_parents:
82
+ external_last_altered_timestamps = adapter.get_table_last_modified_ts(
83
+ list(external_parents)
84
+ )
85
+
86
+ if len(external_last_altered_timestamps) != len(external_parents):
87
+ raise MissingSourceError(
88
+ f"Expected {len(external_parents)} sources to be present, but got {len(external_last_altered_timestamps)}."
89
+ )
90
+
91
+ # Finding new data means that the upstream depedencies have been altered
92
+ # since the last time the model was evaluated
93
+ return any(
94
+ external_last_altered_ts > last_altered_ts
95
+ for external_last_altered_ts in external_last_altered_timestamps
96
+ )
97
+
98
+ return False