sqlmesh 0.217.1.dev1__py3-none-any.whl → 0.227.2.dev20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlmesh/__init__.py +12 -2
- sqlmesh/_version.py +2 -2
- sqlmesh/cli/project_init.py +10 -2
- sqlmesh/core/_typing.py +1 -0
- sqlmesh/core/audit/definition.py +8 -2
- sqlmesh/core/config/__init__.py +1 -1
- sqlmesh/core/config/connection.py +20 -5
- sqlmesh/core/config/dbt.py +13 -0
- sqlmesh/core/config/janitor.py +12 -0
- sqlmesh/core/config/loader.py +7 -0
- sqlmesh/core/config/model.py +2 -0
- sqlmesh/core/config/root.py +3 -0
- sqlmesh/core/console.py +80 -2
- sqlmesh/core/constants.py +1 -1
- sqlmesh/core/context.py +112 -35
- sqlmesh/core/dialect.py +3 -0
- sqlmesh/core/engine_adapter/_typing.py +2 -0
- sqlmesh/core/engine_adapter/base.py +330 -23
- sqlmesh/core/engine_adapter/base_postgres.py +17 -1
- sqlmesh/core/engine_adapter/bigquery.py +146 -7
- sqlmesh/core/engine_adapter/clickhouse.py +17 -13
- sqlmesh/core/engine_adapter/databricks.py +50 -2
- sqlmesh/core/engine_adapter/fabric.py +110 -29
- sqlmesh/core/engine_adapter/mixins.py +142 -48
- sqlmesh/core/engine_adapter/mssql.py +15 -4
- sqlmesh/core/engine_adapter/mysql.py +2 -2
- sqlmesh/core/engine_adapter/postgres.py +9 -3
- sqlmesh/core/engine_adapter/redshift.py +4 -0
- sqlmesh/core/engine_adapter/risingwave.py +1 -0
- sqlmesh/core/engine_adapter/shared.py +6 -0
- sqlmesh/core/engine_adapter/snowflake.py +82 -11
- sqlmesh/core/engine_adapter/spark.py +14 -10
- sqlmesh/core/engine_adapter/trino.py +5 -2
- sqlmesh/core/janitor.py +181 -0
- sqlmesh/core/lineage.py +1 -0
- sqlmesh/core/linter/rules/builtin.py +15 -0
- sqlmesh/core/loader.py +17 -30
- sqlmesh/core/macros.py +35 -13
- sqlmesh/core/model/common.py +2 -0
- sqlmesh/core/model/definition.py +72 -4
- sqlmesh/core/model/kind.py +66 -2
- sqlmesh/core/model/meta.py +107 -2
- sqlmesh/core/node.py +101 -2
- sqlmesh/core/plan/builder.py +15 -10
- sqlmesh/core/plan/common.py +196 -2
- sqlmesh/core/plan/definition.py +21 -6
- sqlmesh/core/plan/evaluator.py +72 -113
- sqlmesh/core/plan/explainer.py +90 -8
- sqlmesh/core/plan/stages.py +42 -21
- sqlmesh/core/renderer.py +26 -18
- sqlmesh/core/scheduler.py +60 -19
- sqlmesh/core/selector.py +137 -9
- sqlmesh/core/signal.py +64 -1
- sqlmesh/core/snapshot/__init__.py +1 -0
- sqlmesh/core/snapshot/definition.py +109 -25
- sqlmesh/core/snapshot/evaluator.py +610 -50
- sqlmesh/core/state_sync/__init__.py +0 -1
- sqlmesh/core/state_sync/base.py +31 -27
- sqlmesh/core/state_sync/cache.py +12 -4
- sqlmesh/core/state_sync/common.py +216 -111
- sqlmesh/core/state_sync/db/facade.py +30 -15
- sqlmesh/core/state_sync/db/interval.py +27 -7
- sqlmesh/core/state_sync/db/migrator.py +14 -8
- sqlmesh/core/state_sync/db/snapshot.py +119 -87
- sqlmesh/core/table_diff.py +2 -2
- sqlmesh/core/test/definition.py +14 -9
- sqlmesh/core/test/discovery.py +4 -0
- sqlmesh/dbt/adapter.py +20 -11
- sqlmesh/dbt/basemodel.py +52 -41
- sqlmesh/dbt/builtin.py +27 -11
- sqlmesh/dbt/column.py +17 -5
- sqlmesh/dbt/common.py +4 -2
- sqlmesh/dbt/context.py +14 -1
- sqlmesh/dbt/loader.py +60 -8
- sqlmesh/dbt/manifest.py +136 -8
- sqlmesh/dbt/model.py +105 -25
- sqlmesh/dbt/package.py +16 -1
- sqlmesh/dbt/profile.py +3 -3
- sqlmesh/dbt/project.py +12 -7
- sqlmesh/dbt/seed.py +1 -1
- sqlmesh/dbt/source.py +6 -1
- sqlmesh/dbt/target.py +25 -6
- sqlmesh/dbt/test.py +31 -1
- sqlmesh/integrations/github/cicd/controller.py +6 -2
- sqlmesh/lsp/context.py +4 -2
- sqlmesh/magics.py +1 -1
- sqlmesh/migrations/v0000_baseline.py +3 -6
- sqlmesh/migrations/v0061_mysql_fix_blob_text_type.py +2 -5
- sqlmesh/migrations/v0062_add_model_gateway.py +2 -2
- sqlmesh/migrations/v0063_change_signals.py +2 -4
- sqlmesh/migrations/v0064_join_when_matched_strings.py +2 -4
- sqlmesh/migrations/v0065_add_model_optimize.py +2 -2
- sqlmesh/migrations/v0066_add_auto_restatements.py +2 -6
- sqlmesh/migrations/v0067_add_tsql_date_full_precision.py +2 -2
- sqlmesh/migrations/v0068_include_unrendered_query_in_metadata_hash.py +2 -2
- sqlmesh/migrations/v0069_update_dev_table_suffix.py +2 -4
- sqlmesh/migrations/v0070_include_grains_in_metadata_hash.py +2 -2
- sqlmesh/migrations/v0071_add_dev_version_to_intervals.py +2 -6
- sqlmesh/migrations/v0072_add_environment_statements.py +2 -4
- sqlmesh/migrations/v0073_remove_symbolic_disable_restatement.py +2 -4
- sqlmesh/migrations/v0074_add_partition_by_time_column_property.py +2 -2
- sqlmesh/migrations/v0075_remove_validate_query.py +2 -4
- sqlmesh/migrations/v0076_add_cron_tz.py +2 -2
- sqlmesh/migrations/v0077_fix_column_type_hash_calculation.py +2 -2
- sqlmesh/migrations/v0078_warn_if_non_migratable_python_env.py +2 -4
- sqlmesh/migrations/v0079_add_gateway_managed_property.py +7 -9
- sqlmesh/migrations/v0080_add_batch_size_to_scd_type_2_models.py +2 -2
- sqlmesh/migrations/v0081_update_partitioned_by.py +2 -4
- sqlmesh/migrations/v0082_warn_if_incorrectly_duplicated_statements.py +2 -4
- sqlmesh/migrations/v0083_use_sql_for_scd_time_data_type_data_hash.py +2 -2
- sqlmesh/migrations/v0084_normalize_quote_when_matched_and_merge_filter.py +2 -2
- sqlmesh/migrations/v0085_deterministic_repr.py +2 -4
- sqlmesh/migrations/v0086_check_deterministic_bug.py +2 -4
- sqlmesh/migrations/v0087_normalize_blueprint_variables.py +2 -4
- sqlmesh/migrations/v0088_warn_about_variable_python_env_diffs.py +2 -4
- sqlmesh/migrations/v0089_add_virtual_environment_mode.py +2 -2
- sqlmesh/migrations/v0090_add_forward_only_column.py +2 -6
- sqlmesh/migrations/v0091_on_additive_change.py +2 -2
- sqlmesh/migrations/v0092_warn_about_dbt_data_type_diff.py +2 -4
- sqlmesh/migrations/v0093_use_raw_sql_in_fingerprint.py +2 -2
- sqlmesh/migrations/v0094_add_dev_version_and_fingerprint_columns.py +2 -6
- sqlmesh/migrations/v0095_warn_about_dbt_raw_sql_diff.py +2 -4
- sqlmesh/migrations/v0096_remove_plan_dags_table.py +2 -4
- sqlmesh/migrations/v0097_add_dbt_name_in_node.py +2 -2
- sqlmesh/migrations/v0098_add_dbt_node_info_in_node.py +103 -0
- sqlmesh/migrations/v0099_add_last_altered_to_intervals.py +25 -0
- sqlmesh/migrations/v0100_add_grants_and_grants_target_layer.py +9 -0
- sqlmesh/utils/__init__.py +8 -1
- sqlmesh/utils/cache.py +5 -1
- sqlmesh/utils/date.py +1 -1
- sqlmesh/utils/errors.py +4 -0
- sqlmesh/utils/git.py +3 -1
- sqlmesh/utils/jinja.py +25 -2
- sqlmesh/utils/pydantic.py +6 -6
- sqlmesh/utils/windows.py +13 -3
- {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/METADATA +5 -5
- {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/RECORD +188 -183
- sqlmesh_dbt/cli.py +70 -7
- sqlmesh_dbt/console.py +14 -6
- sqlmesh_dbt/operations.py +103 -24
- sqlmesh_dbt/selectors.py +39 -1
- web/client/dist/assets/{Audits-Ucsx1GzF.js → Audits-CBiYyyx-.js} +1 -1
- web/client/dist/assets/{Banner-BWDzvavM.js → Banner-DSRbUlO5.js} +1 -1
- web/client/dist/assets/{ChevronDownIcon-D2VL13Ah.js → ChevronDownIcon-MK_nrjD_.js} +1 -1
- web/client/dist/assets/{ChevronRightIcon-DWGYbf1l.js → ChevronRightIcon-CLWtT22Q.js} +1 -1
- web/client/dist/assets/{Content-DdHDZM3I.js → Content-BNuGZN5l.js} +1 -1
- web/client/dist/assets/{Content-Bikfy8fh.js → Content-CSHJyW0n.js} +1 -1
- web/client/dist/assets/{Data-CzAJH7rW.js → Data-C1oRDbLx.js} +1 -1
- web/client/dist/assets/{DataCatalog-BJF11g8f.js → DataCatalog-HXyX2-_j.js} +1 -1
- web/client/dist/assets/{Editor-s0SBpV2y.js → Editor-BDyfpUuw.js} +1 -1
- web/client/dist/assets/{Editor-DgLhgKnm.js → Editor-D0jNItwC.js} +1 -1
- web/client/dist/assets/{Errors-D0m0O1d3.js → Errors-BfuFLcPi.js} +1 -1
- web/client/dist/assets/{FileExplorer-CEv0vXkt.js → FileExplorer-BR9IE3he.js} +1 -1
- web/client/dist/assets/{Footer-BwzXn8Ew.js → Footer-CgBEtiAh.js} +1 -1
- web/client/dist/assets/{Header-6heDkEqG.js → Header-DSqR6nSO.js} +1 -1
- web/client/dist/assets/{Input-obuJsD6k.js → Input-B-oZ6fGO.js} +1 -1
- web/client/dist/assets/Lineage-DYQVwDbD.js +1 -0
- web/client/dist/assets/{ListboxShow-HM9_qyrt.js → ListboxShow-BE5-xevs.js} +1 -1
- web/client/dist/assets/{ModelLineage-zWdKo0U2.js → ModelLineage-DkIFAYo4.js} +1 -1
- web/client/dist/assets/{Models-Bcu66SRz.js → Models-D5dWr8RB.js} +1 -1
- web/client/dist/assets/{Page-BWEEQfIt.js → Page-C-XfU5BR.js} +1 -1
- web/client/dist/assets/{Plan-C4gXCqlf.js → Plan-ZEuTINBq.js} +1 -1
- web/client/dist/assets/{PlusCircleIcon-CVDO651q.js → PlusCircleIcon-DVXAHG8_.js} +1 -1
- web/client/dist/assets/{ReportErrors-BT6xFwAr.js → ReportErrors-B7FEPzMB.js} +1 -1
- web/client/dist/assets/{Root-ryJoBK4h.js → Root-8aZyhPxF.js} +1 -1
- web/client/dist/assets/{SearchList-DB04sPb9.js → SearchList-W_iT2G82.js} +1 -1
- web/client/dist/assets/{SelectEnvironment-CUYcXUu6.js → SelectEnvironment-C65jALmO.js} +1 -1
- web/client/dist/assets/{SourceList-Doo_9ZGp.js → SourceList-DSLO6nVJ.js} +1 -1
- web/client/dist/assets/{SourceListItem-D5Mj7Dly.js → SourceListItem-BHt8d9-I.js} +1 -1
- web/client/dist/assets/{SplitPane-qHmkD1qy.js → SplitPane-CViaZmw6.js} +1 -1
- web/client/dist/assets/{Tests-DH1Z74ML.js → Tests-DhaVt5t1.js} +1 -1
- web/client/dist/assets/{Welcome-DqUJUNMF.js → Welcome-DvpjH-_4.js} +1 -1
- web/client/dist/assets/context-BctCsyGb.js +71 -0
- web/client/dist/assets/{context-Dr54UHLi.js → context-DFNeGsFF.js} +1 -1
- web/client/dist/assets/{editor-DYIP1yQ4.js → editor-CcO28cqd.js} +1 -1
- web/client/dist/assets/{file-DarlIDVi.js → file-CvJN3aZO.js} +1 -1
- web/client/dist/assets/{floating-ui.react-dom-BH3TFvkM.js → floating-ui.react-dom-CjE-JNW1.js} +1 -1
- web/client/dist/assets/{help-Bl8wqaQc.js → help-DuPhjipa.js} +1 -1
- web/client/dist/assets/{index-D1sR7wpN.js → index-C-dJH7yZ.js} +1 -1
- web/client/dist/assets/{index-O3mjYpnE.js → index-Dj0i1-CA.js} +2 -2
- web/client/dist/assets/{plan-CehRrJUG.js → plan-BTRSbjKn.js} +1 -1
- web/client/dist/assets/{popover-CqgMRE0G.js → popover-_Sf0yvOI.js} +1 -1
- web/client/dist/assets/{project-6gxepOhm.js → project-BvSOI8MY.js} +1 -1
- web/client/dist/index.html +1 -1
- web/client/dist/assets/Lineage-D0Hgdz2v.js +0 -1
- web/client/dist/assets/context-DgX0fp2E.js +0 -68
- {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/WHEEL +0 -0
- {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/entry_points.txt +0 -0
- {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/licenses/LICENSE +0 -0
- {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/top_level.txt +0 -0
sqlmesh/core/plan/stages.py
CHANGED
|
@@ -12,8 +12,9 @@ from sqlmesh.core.snapshot.definition import (
|
|
|
12
12
|
Snapshot,
|
|
13
13
|
SnapshotTableInfo,
|
|
14
14
|
SnapshotId,
|
|
15
|
-
|
|
15
|
+
snapshots_to_dag,
|
|
16
16
|
)
|
|
17
|
+
from sqlmesh.utils.errors import PlanError
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
@dataclass
|
|
@@ -98,14 +99,19 @@ class AuditOnlyRunStage:
|
|
|
98
99
|
|
|
99
100
|
@dataclass
|
|
100
101
|
class RestatementStage:
|
|
101
|
-
"""
|
|
102
|
+
"""Clear intervals from state for snapshots in *other* environments, when restatements are requested in prod.
|
|
103
|
+
|
|
104
|
+
This stage is effectively a "marker" stage to trigger the plan evaluator to perform the "clear intervals" logic after the BackfillStage has completed.
|
|
105
|
+
The "clear intervals" logic is executed just-in-time using the latest state available in order to pick up new snapshots that may have
|
|
106
|
+
been created while the BackfillStage was running, which is why we do not build a list of snapshots to clear at plan time and defer to evaluation time.
|
|
107
|
+
|
|
108
|
+
Note that this stage is only present on `prod` plans because dev plans do not need to worry about clearing intervals in other environments.
|
|
102
109
|
|
|
103
110
|
Args:
|
|
104
|
-
|
|
105
|
-
|
|
111
|
+
all_snapshots: All snapshots in the plan by name. Note that this does not include the snapshots from other environments that will get their
|
|
112
|
+
intervals cleared, it's included here as an optimization to prevent having to re-fetch the current plan's snapshots
|
|
106
113
|
"""
|
|
107
114
|
|
|
108
|
-
snapshot_intervals: t.Dict[SnapshotTableInfo, Interval]
|
|
109
115
|
all_snapshots: t.Dict[str, Snapshot]
|
|
110
116
|
|
|
111
117
|
|
|
@@ -244,6 +250,7 @@ class PlanStagesBuilder:
|
|
|
244
250
|
stored_snapshots = self.state_reader.get_snapshots(plan.environment.snapshots)
|
|
245
251
|
snapshots = {**new_snapshots, **stored_snapshots}
|
|
246
252
|
snapshots_by_name = {s.name: s for s in snapshots.values()}
|
|
253
|
+
dag = snapshots_to_dag(snapshots.values())
|
|
247
254
|
|
|
248
255
|
all_selected_for_backfill_snapshots = {
|
|
249
256
|
s.snapshot_id for s in snapshots.values() if plan.is_selected_for_backfill(s.name)
|
|
@@ -261,14 +268,21 @@ class PlanStagesBuilder:
|
|
|
261
268
|
before_promote_snapshots = {
|
|
262
269
|
s.snapshot_id
|
|
263
270
|
for s in snapshots.values()
|
|
264
|
-
if deployability_index.is_representative(s)
|
|
271
|
+
if (deployability_index.is_representative(s) or s.is_seed)
|
|
265
272
|
and plan.is_selected_for_backfill(s.name)
|
|
266
273
|
}
|
|
267
274
|
after_promote_snapshots = all_selected_for_backfill_snapshots - before_promote_snapshots
|
|
268
275
|
deployability_index = DeployabilityIndex.all_deployable()
|
|
269
276
|
|
|
277
|
+
snapshot_ids_with_schema_migration = [
|
|
278
|
+
s.snapshot_id for s in snapshots.values() if s.requires_schema_migration_in_prod
|
|
279
|
+
]
|
|
280
|
+
# Include all upstream dependencies of snapshots that require schema migration to make sure
|
|
281
|
+
# the upstream tables are created before the schema updates are applied
|
|
270
282
|
snapshots_with_schema_migration = [
|
|
271
|
-
|
|
283
|
+
snapshots[s_id]
|
|
284
|
+
for s_id in dag.subdag(*snapshot_ids_with_schema_migration)
|
|
285
|
+
if snapshots[s_id].supports_schema_migration_in_prod
|
|
272
286
|
]
|
|
273
287
|
|
|
274
288
|
snapshots_to_intervals = self._missing_intervals(
|
|
@@ -321,10 +335,6 @@ class PlanStagesBuilder:
|
|
|
321
335
|
if audit_only_snapshots:
|
|
322
336
|
stages.append(AuditOnlyRunStage(snapshots=list(audit_only_snapshots.values())))
|
|
323
337
|
|
|
324
|
-
restatement_stage = self._get_restatement_stage(plan, snapshots_by_name)
|
|
325
|
-
if restatement_stage:
|
|
326
|
-
stages.append(restatement_stage)
|
|
327
|
-
|
|
328
338
|
if missing_intervals_before_promote:
|
|
329
339
|
stages.append(
|
|
330
340
|
BackfillStage(
|
|
@@ -349,6 +359,15 @@ class PlanStagesBuilder:
|
|
|
349
359
|
)
|
|
350
360
|
)
|
|
351
361
|
|
|
362
|
+
# note: "restatement stage" (which is clearing intervals in state - not actually performing the restatements, that's the backfill stage)
|
|
363
|
+
# needs to come *after* the backfill stage so that at no time do other plans / runs see empty prod intervals and compete with this plan to try to fill them.
|
|
364
|
+
# in addition, when we update intervals in state, we only clear intervals from dev snapshots to force dev models to be backfilled based on the new prod data.
|
|
365
|
+
# we can leave prod intervals alone because by the time this plan finishes, the intervals in state have not actually changed, since restatement replaces
|
|
366
|
+
# data for existing intervals and does not produce new ones
|
|
367
|
+
restatement_stage = self._get_restatement_stage(plan, snapshots_by_name)
|
|
368
|
+
if restatement_stage:
|
|
369
|
+
stages.append(restatement_stage)
|
|
370
|
+
|
|
352
371
|
stages.append(
|
|
353
372
|
EnvironmentRecordUpdateStage(
|
|
354
373
|
no_gaps_snapshot_names={s.name for s in before_promote_snapshots}
|
|
@@ -443,16 +462,18 @@ class PlanStagesBuilder:
|
|
|
443
462
|
def _get_restatement_stage(
|
|
444
463
|
self, plan: EvaluatablePlan, snapshots_by_name: t.Dict[str, Snapshot]
|
|
445
464
|
) -> t.Optional[RestatementStage]:
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
465
|
+
if plan.restate_all_snapshots:
|
|
466
|
+
if plan.is_dev:
|
|
467
|
+
raise PlanError(
|
|
468
|
+
"Clearing intervals from state across dev model versions is only valid for prod plans"
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
if plan.restatements:
|
|
472
|
+
return RestatementStage(
|
|
473
|
+
all_snapshots=snapshots_by_name,
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
return None
|
|
456
477
|
|
|
457
478
|
def _get_physical_layer_update_stage(
|
|
458
479
|
self,
|
sqlmesh/core/renderer.py
CHANGED
|
@@ -6,7 +6,7 @@ from contextlib import contextmanager
|
|
|
6
6
|
from functools import partial
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
|
|
9
|
-
from sqlglot import exp,
|
|
9
|
+
from sqlglot import exp, Dialect
|
|
10
10
|
from sqlglot.errors import SqlglotError
|
|
11
11
|
from sqlglot.helper import ensure_list
|
|
12
12
|
from sqlglot.optimizer.annotate_types import annotate_types
|
|
@@ -196,7 +196,14 @@ class BaseExpressionRenderer:
|
|
|
196
196
|
**kwargs,
|
|
197
197
|
}
|
|
198
198
|
|
|
199
|
+
if this_model:
|
|
200
|
+
render_kwargs["this_model"] = this_model
|
|
201
|
+
|
|
202
|
+
macro_evaluator.locals.update(render_kwargs)
|
|
203
|
+
|
|
199
204
|
variables = kwargs.pop("variables", {})
|
|
205
|
+
if variables:
|
|
206
|
+
macro_evaluator.locals.setdefault(c.SQLMESH_VARS, {}).update(variables)
|
|
200
207
|
|
|
201
208
|
expressions = [self._expression]
|
|
202
209
|
if isinstance(self._expression, d.Jinja):
|
|
@@ -249,23 +256,24 @@ class BaseExpressionRenderer:
|
|
|
249
256
|
) from ex
|
|
250
257
|
|
|
251
258
|
if rendered_expression.strip():
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
259
|
+
# ensure there is actual SQL and not just comments and non-SQL jinja
|
|
260
|
+
dialect = Dialect.get_or_raise(self._dialect)
|
|
261
|
+
tokens = dialect.tokenize(rendered_expression)
|
|
262
|
+
|
|
263
|
+
if tokens:
|
|
264
|
+
try:
|
|
265
|
+
expressions = [
|
|
266
|
+
e for e in dialect.parser().parse(tokens, rendered_expression) if e
|
|
267
|
+
]
|
|
268
|
+
|
|
269
|
+
if not expressions:
|
|
270
|
+
raise ConfigError(
|
|
271
|
+
f"Failed to parse an expression:\n{rendered_expression}"
|
|
272
|
+
)
|
|
273
|
+
except Exception as ex:
|
|
274
|
+
raise ConfigError(
|
|
275
|
+
f"Could not parse the rendered jinja at '{self._path}'.\n{ex}"
|
|
276
|
+
) from ex
|
|
269
277
|
|
|
270
278
|
for definition in self._macro_definitions:
|
|
271
279
|
try:
|
sqlmesh/core/scheduler.py
CHANGED
|
@@ -251,7 +251,9 @@ class Scheduler:
|
|
|
251
251
|
**kwargs,
|
|
252
252
|
)
|
|
253
253
|
|
|
254
|
-
self.state_sync.add_interval(
|
|
254
|
+
self.state_sync.add_interval(
|
|
255
|
+
snapshot, start, end, is_dev=not is_deployable, last_altered_ts=now_timestamp()
|
|
256
|
+
)
|
|
255
257
|
return audit_results
|
|
256
258
|
|
|
257
259
|
def run(
|
|
@@ -335,6 +337,7 @@ class Scheduler:
|
|
|
335
337
|
deployability_index: t.Optional[DeployabilityIndex],
|
|
336
338
|
environment_naming_info: EnvironmentNamingInfo,
|
|
337
339
|
dag: t.Optional[DAG[SnapshotId]] = None,
|
|
340
|
+
is_restatement: bool = False,
|
|
338
341
|
) -> t.Dict[Snapshot, Intervals]:
|
|
339
342
|
dag = dag or snapshots_to_dag(merged_intervals)
|
|
340
343
|
|
|
@@ -349,7 +352,7 @@ class Scheduler:
|
|
|
349
352
|
)
|
|
350
353
|
for snapshot, intervals in merged_intervals.items()
|
|
351
354
|
}
|
|
352
|
-
snapshot_batches = {}
|
|
355
|
+
snapshot_batches: t.Dict[Snapshot, Intervals] = {}
|
|
353
356
|
all_unready_intervals: t.Dict[str, set[Interval]] = {}
|
|
354
357
|
for snapshot_id in dag:
|
|
355
358
|
if snapshot_id not in snapshot_intervals:
|
|
@@ -361,12 +364,22 @@ class Scheduler:
|
|
|
361
364
|
|
|
362
365
|
adapter = self.snapshot_evaluator.get_adapter(snapshot.model_gateway)
|
|
363
366
|
|
|
367
|
+
parent_intervals: Intervals = []
|
|
368
|
+
for parent_id in snapshot.parents:
|
|
369
|
+
parent_snapshot, _ = snapshot_intervals.get(parent_id, (None, []))
|
|
370
|
+
if not parent_snapshot or parent_snapshot.is_external:
|
|
371
|
+
continue
|
|
372
|
+
|
|
373
|
+
parent_intervals.extend(snapshot_batches[parent_snapshot])
|
|
374
|
+
|
|
364
375
|
context = ExecutionContext(
|
|
365
376
|
adapter,
|
|
366
377
|
self.snapshots_by_name,
|
|
367
378
|
deployability_index,
|
|
368
379
|
default_dialect=adapter.dialect,
|
|
369
380
|
default_catalog=self.default_catalog,
|
|
381
|
+
is_restatement=is_restatement,
|
|
382
|
+
parent_intervals=parent_intervals,
|
|
370
383
|
)
|
|
371
384
|
|
|
372
385
|
intervals = self._check_ready_intervals(
|
|
@@ -422,6 +435,7 @@ class Scheduler:
|
|
|
422
435
|
run_environment_statements: bool = False,
|
|
423
436
|
audit_only: bool = False,
|
|
424
437
|
auto_restatement_triggers: t.Dict[SnapshotId, t.List[SnapshotId]] = {},
|
|
438
|
+
is_restatement: bool = False,
|
|
425
439
|
) -> t.Tuple[t.List[NodeExecutionFailedError[SchedulingUnit]], t.List[SchedulingUnit]]:
|
|
426
440
|
"""Runs precomputed batches of missing intervals.
|
|
427
441
|
|
|
@@ -455,9 +469,12 @@ class Scheduler:
|
|
|
455
469
|
snapshot_dag = full_dag.subdag(*selected_snapshot_ids_set)
|
|
456
470
|
|
|
457
471
|
batched_intervals = self.batch_intervals(
|
|
458
|
-
merged_intervals,
|
|
472
|
+
merged_intervals,
|
|
473
|
+
deployability_index,
|
|
474
|
+
environment_naming_info,
|
|
475
|
+
dag=snapshot_dag,
|
|
476
|
+
is_restatement=is_restatement,
|
|
459
477
|
)
|
|
460
|
-
|
|
461
478
|
self.console.start_evaluation_progress(
|
|
462
479
|
batched_intervals,
|
|
463
480
|
environment_naming_info,
|
|
@@ -530,6 +547,10 @@ class Scheduler:
|
|
|
530
547
|
execution_time=execution_time,
|
|
531
548
|
)
|
|
532
549
|
else:
|
|
550
|
+
# If batch_index > 0, then the target table must exist since the first batch would have created it
|
|
551
|
+
target_table_exists = (
|
|
552
|
+
snapshot.snapshot_id not in snapshots_to_create or node.batch_index > 0
|
|
553
|
+
)
|
|
533
554
|
audit_results = self.evaluate(
|
|
534
555
|
snapshot=snapshot,
|
|
535
556
|
environment_naming_info=environment_naming_info,
|
|
@@ -540,7 +561,7 @@ class Scheduler:
|
|
|
540
561
|
batch_index=node.batch_index,
|
|
541
562
|
allow_destructive_snapshots=allow_destructive_snapshots,
|
|
542
563
|
allow_additive_snapshots=allow_additive_snapshots,
|
|
543
|
-
target_table_exists=
|
|
564
|
+
target_table_exists=target_table_exists,
|
|
544
565
|
selected_models=selected_models,
|
|
545
566
|
)
|
|
546
567
|
|
|
@@ -638,6 +659,7 @@ class Scheduler:
|
|
|
638
659
|
}
|
|
639
660
|
snapshots_to_create = snapshots_to_create or set()
|
|
640
661
|
original_snapshots_to_create = snapshots_to_create.copy()
|
|
662
|
+
upstream_dependencies_cache: t.Dict[SnapshotId, t.Set[SchedulingUnit]] = {}
|
|
641
663
|
|
|
642
664
|
snapshot_dag = snapshot_dag or snapshots_to_dag(batches)
|
|
643
665
|
dag = DAG[SchedulingUnit]()
|
|
@@ -649,12 +671,15 @@ class Scheduler:
|
|
|
649
671
|
snapshot = self.snapshots_by_name[snapshot_id.name]
|
|
650
672
|
intervals = intervals_per_snapshot.get(snapshot.name, [])
|
|
651
673
|
|
|
652
|
-
upstream_dependencies: t.
|
|
674
|
+
upstream_dependencies: t.Set[SchedulingUnit] = set()
|
|
653
675
|
|
|
654
676
|
for p_sid in snapshot.parents:
|
|
655
|
-
upstream_dependencies.
|
|
677
|
+
upstream_dependencies.update(
|
|
656
678
|
self._find_upstream_dependencies(
|
|
657
|
-
p_sid,
|
|
679
|
+
p_sid,
|
|
680
|
+
intervals_per_snapshot,
|
|
681
|
+
original_snapshots_to_create,
|
|
682
|
+
upstream_dependencies_cache,
|
|
658
683
|
)
|
|
659
684
|
)
|
|
660
685
|
|
|
@@ -705,29 +730,42 @@ class Scheduler:
|
|
|
705
730
|
parent_sid: SnapshotId,
|
|
706
731
|
intervals_per_snapshot: t.Dict[str, Intervals],
|
|
707
732
|
snapshots_to_create: t.Set[SnapshotId],
|
|
708
|
-
|
|
733
|
+
cache: t.Dict[SnapshotId, t.Set[SchedulingUnit]],
|
|
734
|
+
) -> t.Set[SchedulingUnit]:
|
|
709
735
|
if parent_sid not in self.snapshots:
|
|
710
|
-
return
|
|
736
|
+
return set()
|
|
737
|
+
if parent_sid in cache:
|
|
738
|
+
return cache[parent_sid]
|
|
711
739
|
|
|
712
740
|
p_intervals = intervals_per_snapshot.get(parent_sid.name, [])
|
|
713
741
|
|
|
742
|
+
parent_node: t.Optional[SchedulingUnit] = None
|
|
714
743
|
if p_intervals:
|
|
715
744
|
if len(p_intervals) > 1:
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
745
|
+
parent_node = DummyNode(snapshot_name=parent_sid.name)
|
|
746
|
+
else:
|
|
747
|
+
interval = p_intervals[0]
|
|
748
|
+
parent_node = EvaluateNode(
|
|
749
|
+
snapshot_name=parent_sid.name, interval=interval, batch_index=0
|
|
750
|
+
)
|
|
751
|
+
elif parent_sid in snapshots_to_create:
|
|
752
|
+
parent_node = CreateNode(snapshot_name=parent_sid.name)
|
|
753
|
+
|
|
754
|
+
if parent_node is not None:
|
|
755
|
+
cache[parent_sid] = {parent_node}
|
|
756
|
+
return {parent_node}
|
|
757
|
+
|
|
721
758
|
# This snapshot has no intervals and doesn't need creation which means
|
|
722
759
|
# that it can be a transitive dependency
|
|
723
|
-
transitive_deps: t.
|
|
760
|
+
transitive_deps: t.Set[SchedulingUnit] = set()
|
|
724
761
|
parent_snapshot = self.snapshots[parent_sid]
|
|
725
762
|
for grandparent_sid in parent_snapshot.parents:
|
|
726
|
-
transitive_deps.
|
|
763
|
+
transitive_deps.update(
|
|
727
764
|
self._find_upstream_dependencies(
|
|
728
|
-
grandparent_sid, intervals_per_snapshot, snapshots_to_create
|
|
765
|
+
grandparent_sid, intervals_per_snapshot, snapshots_to_create, cache
|
|
729
766
|
)
|
|
730
767
|
)
|
|
768
|
+
cache[parent_sid] = transitive_deps
|
|
731
769
|
return transitive_deps
|
|
732
770
|
|
|
733
771
|
def _run_or_audit(
|
|
@@ -839,7 +877,9 @@ class Scheduler:
|
|
|
839
877
|
run_environment_statements=run_environment_statements,
|
|
840
878
|
audit_only=audit_only,
|
|
841
879
|
auto_restatement_triggers=auto_restatement_triggers,
|
|
842
|
-
selected_models={
|
|
880
|
+
selected_models={
|
|
881
|
+
s.node.dbt_unique_id for s in merged_intervals if s.node.dbt_unique_id
|
|
882
|
+
},
|
|
843
883
|
)
|
|
844
884
|
|
|
845
885
|
return CompletionStatus.FAILURE if errors else CompletionStatus.SUCCESS
|
|
@@ -954,6 +994,7 @@ class Scheduler:
|
|
|
954
994
|
python_env=signals.python_env,
|
|
955
995
|
dialect=snapshot.model.dialect,
|
|
956
996
|
path=snapshot.model._path,
|
|
997
|
+
snapshot=snapshot,
|
|
957
998
|
kwargs=kwargs,
|
|
958
999
|
)
|
|
959
1000
|
except SQLMeshError as e:
|
sqlmesh/core/selector.py
CHANGED
|
@@ -3,6 +3,8 @@ from __future__ import annotations
|
|
|
3
3
|
import fnmatch
|
|
4
4
|
import typing as t
|
|
5
5
|
from pathlib import Path
|
|
6
|
+
from itertools import zip_longest
|
|
7
|
+
import abc
|
|
6
8
|
|
|
7
9
|
from sqlglot import exp
|
|
8
10
|
from sqlglot.errors import ParseError
|
|
@@ -14,6 +16,7 @@ from sqlmesh.core import constants as c
|
|
|
14
16
|
from sqlmesh.core.dialect import normalize_model_name
|
|
15
17
|
from sqlmesh.core.environment import Environment
|
|
16
18
|
from sqlmesh.core.model import update_model_schemas
|
|
19
|
+
from sqlmesh.core.audit import StandaloneAudit
|
|
17
20
|
from sqlmesh.utils import UniqueKeyDict
|
|
18
21
|
from sqlmesh.utils.dag import DAG
|
|
19
22
|
from sqlmesh.utils.git import GitClient
|
|
@@ -23,10 +26,11 @@ from sqlmesh.utils.errors import SQLMeshError
|
|
|
23
26
|
if t.TYPE_CHECKING:
|
|
24
27
|
from typing_extensions import Literal as Lit # noqa
|
|
25
28
|
from sqlmesh.core.model import Model
|
|
29
|
+
from sqlmesh.core.node import Node
|
|
26
30
|
from sqlmesh.core.state_sync import StateReader
|
|
27
31
|
|
|
28
32
|
|
|
29
|
-
class Selector:
|
|
33
|
+
class Selector(abc.ABC):
|
|
30
34
|
def __init__(
|
|
31
35
|
self,
|
|
32
36
|
state_reader: StateReader,
|
|
@@ -165,20 +169,20 @@ class Selector:
|
|
|
165
169
|
return models
|
|
166
170
|
|
|
167
171
|
def expand_model_selections(
|
|
168
|
-
self, model_selections: t.Iterable[str], models: t.Optional[t.Dict[str,
|
|
172
|
+
self, model_selections: t.Iterable[str], models: t.Optional[t.Dict[str, Node]] = None
|
|
169
173
|
) -> t.Set[str]:
|
|
170
|
-
"""Expands a set of model selections into a set of model
|
|
174
|
+
"""Expands a set of model selections into a set of model fqns that can be looked up in the Context.
|
|
171
175
|
|
|
172
176
|
Args:
|
|
173
177
|
model_selections: A set of model selections.
|
|
174
178
|
|
|
175
179
|
Returns:
|
|
176
|
-
A set of model
|
|
180
|
+
A set of model fqns.
|
|
177
181
|
"""
|
|
178
182
|
|
|
179
183
|
node = parse(" | ".join(f"({s})" for s in model_selections))
|
|
180
184
|
|
|
181
|
-
all_models = models or self._models
|
|
185
|
+
all_models: t.Dict[str, Node] = models or dict(self._models)
|
|
182
186
|
models_by_tags: t.Dict[str, t.Set[str]] = {}
|
|
183
187
|
|
|
184
188
|
for fqn, model in all_models.items():
|
|
@@ -194,10 +198,9 @@ class Selector:
|
|
|
194
198
|
return {
|
|
195
199
|
fqn
|
|
196
200
|
for fqn, model in all_models.items()
|
|
197
|
-
if fnmatch.fnmatchcase(model
|
|
201
|
+
if fnmatch.fnmatchcase(self._model_name(model), node.this)
|
|
198
202
|
}
|
|
199
|
-
|
|
200
|
-
return {fqn} if fqn in all_models else set()
|
|
203
|
+
return self._pattern_to_model_fqns(pattern, all_models)
|
|
201
204
|
if isinstance(node, exp.And):
|
|
202
205
|
return evaluate(node.left) & evaluate(node.right)
|
|
203
206
|
if isinstance(node, exp.Or):
|
|
@@ -225,6 +228,13 @@ class Selector:
|
|
|
225
228
|
if fnmatch.fnmatchcase(tag, pattern)
|
|
226
229
|
}
|
|
227
230
|
return models_by_tags.get(pattern, set())
|
|
231
|
+
if isinstance(node, ResourceType):
|
|
232
|
+
resource_type = node.name.lower()
|
|
233
|
+
return {
|
|
234
|
+
fqn
|
|
235
|
+
for fqn, model in all_models.items()
|
|
236
|
+
if self._matches_resource_type(resource_type, model)
|
|
237
|
+
}
|
|
228
238
|
if isinstance(node, Direction):
|
|
229
239
|
selected = set()
|
|
230
240
|
|
|
@@ -241,6 +251,117 @@ class Selector:
|
|
|
241
251
|
|
|
242
252
|
return evaluate(node)
|
|
243
253
|
|
|
254
|
+
@abc.abstractmethod
|
|
255
|
+
def _model_name(self, model: Node) -> str:
|
|
256
|
+
"""Given a model, return the name that a selector pattern contining wildcards should be fnmatch'd on"""
|
|
257
|
+
pass
|
|
258
|
+
|
|
259
|
+
@abc.abstractmethod
|
|
260
|
+
def _pattern_to_model_fqns(self, pattern: str, all_models: t.Dict[str, Node]) -> t.Set[str]:
|
|
261
|
+
"""Given a pattern, return the keys of the matching models from :all_models"""
|
|
262
|
+
pass
|
|
263
|
+
|
|
264
|
+
@abc.abstractmethod
|
|
265
|
+
def _matches_resource_type(self, resource_type: str, model: Node) -> bool:
|
|
266
|
+
"""Indicate whether or not the supplied model matches the supplied resource type"""
|
|
267
|
+
pass
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
class NativeSelector(Selector):
|
|
271
|
+
"""Implementation of selectors that matches objects based on SQLMesh native names"""
|
|
272
|
+
|
|
273
|
+
def _model_name(self, model: Node) -> str:
|
|
274
|
+
return model.name
|
|
275
|
+
|
|
276
|
+
def _pattern_to_model_fqns(self, pattern: str, all_models: t.Dict[str, Node]) -> t.Set[str]:
|
|
277
|
+
fqn = normalize_model_name(pattern, self._default_catalog, self._dialect)
|
|
278
|
+
return {fqn} if fqn in all_models else set()
|
|
279
|
+
|
|
280
|
+
def _matches_resource_type(self, resource_type: str, model: Node) -> bool:
|
|
281
|
+
if resource_type == "model":
|
|
282
|
+
return model.is_model
|
|
283
|
+
if resource_type == "audit":
|
|
284
|
+
return isinstance(model, StandaloneAudit)
|
|
285
|
+
|
|
286
|
+
raise SQLMeshError(f"Unsupported resource type: {resource_type}")
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
class DbtSelector(Selector):
|
|
290
|
+
"""Implementation of selectors that matches objects based on the DBT names instead of the SQLMesh native names"""
|
|
291
|
+
|
|
292
|
+
def _model_name(self, model: Node) -> str:
|
|
293
|
+
if dbt_fqn := model.dbt_fqn:
|
|
294
|
+
return dbt_fqn
|
|
295
|
+
raise SQLMeshError("dbt node information must be populated to use dbt selectors")
|
|
296
|
+
|
|
297
|
+
def _pattern_to_model_fqns(self, pattern: str, all_models: t.Dict[str, Node]) -> t.Set[str]:
|
|
298
|
+
# a pattern like "staging.customers" should match a model called "jaffle_shop.staging.customers"
|
|
299
|
+
# but not a model called "jaffle_shop.customers.staging"
|
|
300
|
+
# also a pattern like "aging" should not match "staging" so we need to consider components; not substrings
|
|
301
|
+
pattern_components = pattern.split(".")
|
|
302
|
+
first_pattern_component = pattern_components[0]
|
|
303
|
+
matches = set()
|
|
304
|
+
for fqn, model in all_models.items():
|
|
305
|
+
if not model.dbt_fqn:
|
|
306
|
+
continue
|
|
307
|
+
|
|
308
|
+
dbt_fqn_components = model.dbt_fqn.split(".")
|
|
309
|
+
try:
|
|
310
|
+
starting_idx = dbt_fqn_components.index(first_pattern_component)
|
|
311
|
+
except ValueError:
|
|
312
|
+
continue
|
|
313
|
+
for pattern_component, fqn_component in zip_longest(
|
|
314
|
+
pattern_components, dbt_fqn_components[starting_idx:]
|
|
315
|
+
):
|
|
316
|
+
if pattern_component and not fqn_component:
|
|
317
|
+
# the pattern still goes but we have run out of fqn components to match; no match
|
|
318
|
+
break
|
|
319
|
+
if fqn_component and not pattern_component:
|
|
320
|
+
# all elements of the pattern have matched elements of the fqn; match
|
|
321
|
+
matches.add(fqn)
|
|
322
|
+
break
|
|
323
|
+
if pattern_component != fqn_component:
|
|
324
|
+
# the pattern explicitly doesnt match a component; no match
|
|
325
|
+
break
|
|
326
|
+
else:
|
|
327
|
+
# called if no explicit break, indicating all components of the pattern matched all components of the fqn
|
|
328
|
+
matches.add(fqn)
|
|
329
|
+
return matches
|
|
330
|
+
|
|
331
|
+
def _matches_resource_type(self, resource_type: str, model: Node) -> bool:
|
|
332
|
+
"""
|
|
333
|
+
ref: https://docs.getdbt.com/reference/node-selection/methods#resource_type
|
|
334
|
+
|
|
335
|
+
# supported by SQLMesh
|
|
336
|
+
"model"
|
|
337
|
+
"seed"
|
|
338
|
+
"source" # external model
|
|
339
|
+
"test" # standalone audit
|
|
340
|
+
|
|
341
|
+
# not supported by SQLMesh yet, commented out to throw an error if someone tries to use them
|
|
342
|
+
"analysis"
|
|
343
|
+
"exposure"
|
|
344
|
+
"metric"
|
|
345
|
+
"saved_query"
|
|
346
|
+
"semantic_model"
|
|
347
|
+
"snapshot"
|
|
348
|
+
"unit_test"
|
|
349
|
+
"""
|
|
350
|
+
if resource_type not in ("model", "seed", "source", "test"):
|
|
351
|
+
raise SQLMeshError(f"Unsupported resource type: {resource_type}")
|
|
352
|
+
|
|
353
|
+
if isinstance(model, StandaloneAudit):
|
|
354
|
+
return resource_type == "test"
|
|
355
|
+
|
|
356
|
+
if resource_type == "model":
|
|
357
|
+
return model.is_model and not model.kind.is_external and not model.kind.is_seed
|
|
358
|
+
if resource_type == "source":
|
|
359
|
+
return model.kind.is_external
|
|
360
|
+
if resource_type == "seed":
|
|
361
|
+
return model.kind.is_seed
|
|
362
|
+
|
|
363
|
+
return False
|
|
364
|
+
|
|
244
365
|
|
|
245
366
|
class SelectorDialect(Dialect):
|
|
246
367
|
IDENTIFIERS_CAN_START_WITH_DIGIT = True
|
|
@@ -271,6 +392,10 @@ class Tag(exp.Expression):
|
|
|
271
392
|
pass
|
|
272
393
|
|
|
273
394
|
|
|
395
|
+
class ResourceType(exp.Expression):
|
|
396
|
+
pass
|
|
397
|
+
|
|
398
|
+
|
|
274
399
|
class Direction(exp.Expression):
|
|
275
400
|
pass
|
|
276
401
|
|
|
@@ -323,7 +448,8 @@ def parse(selector: str, dialect: DialectType = None) -> exp.Expression:
|
|
|
323
448
|
upstream = _match(TokenType.PLUS)
|
|
324
449
|
downstream = None
|
|
325
450
|
tag = _parse_kind("tag")
|
|
326
|
-
|
|
451
|
+
resource_type = False if tag else _parse_kind("resource_type")
|
|
452
|
+
git = False if resource_type else _parse_kind("git")
|
|
327
453
|
lstar = "*" if _match(TokenType.STAR) else ""
|
|
328
454
|
directions = {}
|
|
329
455
|
|
|
@@ -349,6 +475,8 @@ def parse(selector: str, dialect: DialectType = None) -> exp.Expression:
|
|
|
349
475
|
|
|
350
476
|
if tag:
|
|
351
477
|
this = Tag(this=this)
|
|
478
|
+
if resource_type:
|
|
479
|
+
this = ResourceType(this=this)
|
|
352
480
|
if git:
|
|
353
481
|
this = Git(this=this)
|
|
354
482
|
if directions:
|
sqlmesh/core/signal.py
CHANGED
|
@@ -1,7 +1,14 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import typing as t
|
|
4
4
|
from sqlmesh.utils import UniqueKeyDict, registry_decorator
|
|
5
|
+
from sqlmesh.utils.errors import MissingSourceError
|
|
6
|
+
|
|
7
|
+
if t.TYPE_CHECKING:
|
|
8
|
+
from sqlmesh.core.context import ExecutionContext
|
|
9
|
+
from sqlmesh.core.snapshot.definition import Snapshot
|
|
10
|
+
from sqlmesh.utils.date import DatetimeRanges
|
|
11
|
+
from sqlmesh.core.snapshot.definition import DeployabilityIndex
|
|
5
12
|
|
|
6
13
|
|
|
7
14
|
class signal(registry_decorator):
|
|
@@ -33,3 +40,59 @@ class signal(registry_decorator):
|
|
|
33
40
|
|
|
34
41
|
|
|
35
42
|
SignalRegistry = UniqueKeyDict[str, signal]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@signal()
|
|
46
|
+
def freshness(
|
|
47
|
+
batch: DatetimeRanges,
|
|
48
|
+
snapshot: Snapshot,
|
|
49
|
+
context: ExecutionContext,
|
|
50
|
+
) -> bool:
|
|
51
|
+
"""
|
|
52
|
+
Implements model freshness as a signal, i.e it considers this model to be fresh if:
|
|
53
|
+
- Any upstream SQLMesh model has available intervals to compute i.e is fresh
|
|
54
|
+
- Any upstream external model has been altered since the last time the model was evaluated
|
|
55
|
+
"""
|
|
56
|
+
adapter = context.engine_adapter
|
|
57
|
+
if context.is_restatement or not adapter.SUPPORTS_METADATA_TABLE_LAST_MODIFIED_TS:
|
|
58
|
+
return True
|
|
59
|
+
|
|
60
|
+
deployability_index = context.deployability_index or DeployabilityIndex.all_deployable()
|
|
61
|
+
|
|
62
|
+
last_altered_ts = (
|
|
63
|
+
snapshot.last_altered_ts
|
|
64
|
+
if deployability_index.is_deployable(snapshot)
|
|
65
|
+
else snapshot.dev_last_altered_ts
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
if not last_altered_ts:
|
|
69
|
+
return True
|
|
70
|
+
|
|
71
|
+
parent_snapshots = {context.snapshots[p.name] for p in snapshot.parents}
|
|
72
|
+
|
|
73
|
+
upstream_parent_snapshots = {p for p in parent_snapshots if not p.is_external}
|
|
74
|
+
external_parents = snapshot.node.depends_on - {p.name for p in upstream_parent_snapshots}
|
|
75
|
+
|
|
76
|
+
if context.parent_intervals:
|
|
77
|
+
# At least one upstream sqlmesh model has intervals to compute (i.e is fresh),
|
|
78
|
+
# so the current model is considered fresh too
|
|
79
|
+
return True
|
|
80
|
+
|
|
81
|
+
if external_parents:
|
|
82
|
+
external_last_altered_timestamps = adapter.get_table_last_modified_ts(
|
|
83
|
+
list(external_parents)
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
if len(external_last_altered_timestamps) != len(external_parents):
|
|
87
|
+
raise MissingSourceError(
|
|
88
|
+
f"Expected {len(external_parents)} sources to be present, but got {len(external_last_altered_timestamps)}."
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Finding new data means that the upstream depedencies have been altered
|
|
92
|
+
# since the last time the model was evaluated
|
|
93
|
+
return any(
|
|
94
|
+
external_last_altered_ts > last_altered_ts
|
|
95
|
+
for external_last_altered_ts in external_last_altered_timestamps
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
return False
|