sqlmesh 0.217.1.dev1__py3-none-any.whl → 0.227.2.dev4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlmesh/__init__.py +12 -2
- sqlmesh/_version.py +2 -2
- sqlmesh/cli/project_init.py +10 -2
- sqlmesh/core/_typing.py +1 -0
- sqlmesh/core/audit/definition.py +8 -2
- sqlmesh/core/config/__init__.py +1 -1
- sqlmesh/core/config/connection.py +17 -5
- sqlmesh/core/config/dbt.py +13 -0
- sqlmesh/core/config/janitor.py +12 -0
- sqlmesh/core/config/loader.py +7 -0
- sqlmesh/core/config/model.py +2 -0
- sqlmesh/core/config/root.py +3 -0
- sqlmesh/core/console.py +80 -2
- sqlmesh/core/constants.py +1 -1
- sqlmesh/core/context.py +61 -25
- sqlmesh/core/dialect.py +3 -0
- sqlmesh/core/engine_adapter/_typing.py +2 -0
- sqlmesh/core/engine_adapter/base.py +322 -22
- sqlmesh/core/engine_adapter/base_postgres.py +17 -1
- sqlmesh/core/engine_adapter/bigquery.py +146 -7
- sqlmesh/core/engine_adapter/clickhouse.py +17 -13
- sqlmesh/core/engine_adapter/databricks.py +33 -2
- sqlmesh/core/engine_adapter/fabric.py +1 -29
- sqlmesh/core/engine_adapter/mixins.py +142 -48
- sqlmesh/core/engine_adapter/mssql.py +15 -4
- sqlmesh/core/engine_adapter/mysql.py +2 -2
- sqlmesh/core/engine_adapter/postgres.py +9 -3
- sqlmesh/core/engine_adapter/redshift.py +4 -0
- sqlmesh/core/engine_adapter/risingwave.py +1 -0
- sqlmesh/core/engine_adapter/shared.py +6 -0
- sqlmesh/core/engine_adapter/snowflake.py +82 -11
- sqlmesh/core/engine_adapter/spark.py +14 -10
- sqlmesh/core/engine_adapter/trino.py +4 -2
- sqlmesh/core/janitor.py +181 -0
- sqlmesh/core/lineage.py +1 -0
- sqlmesh/core/macros.py +35 -13
- sqlmesh/core/model/common.py +2 -0
- sqlmesh/core/model/definition.py +65 -4
- sqlmesh/core/model/kind.py +66 -2
- sqlmesh/core/model/meta.py +107 -2
- sqlmesh/core/node.py +101 -2
- sqlmesh/core/plan/builder.py +15 -10
- sqlmesh/core/plan/common.py +196 -2
- sqlmesh/core/plan/definition.py +21 -6
- sqlmesh/core/plan/evaluator.py +72 -113
- sqlmesh/core/plan/explainer.py +90 -8
- sqlmesh/core/plan/stages.py +42 -21
- sqlmesh/core/renderer.py +26 -18
- sqlmesh/core/scheduler.py +60 -19
- sqlmesh/core/selector.py +137 -9
- sqlmesh/core/signal.py +64 -1
- sqlmesh/core/snapshot/__init__.py +1 -0
- sqlmesh/core/snapshot/definition.py +109 -25
- sqlmesh/core/snapshot/evaluator.py +610 -50
- sqlmesh/core/state_sync/__init__.py +0 -1
- sqlmesh/core/state_sync/base.py +31 -27
- sqlmesh/core/state_sync/cache.py +12 -4
- sqlmesh/core/state_sync/common.py +216 -111
- sqlmesh/core/state_sync/db/facade.py +30 -15
- sqlmesh/core/state_sync/db/interval.py +27 -7
- sqlmesh/core/state_sync/db/migrator.py +14 -8
- sqlmesh/core/state_sync/db/snapshot.py +119 -87
- sqlmesh/core/table_diff.py +2 -2
- sqlmesh/core/test/definition.py +14 -9
- sqlmesh/dbt/adapter.py +20 -11
- sqlmesh/dbt/basemodel.py +52 -41
- sqlmesh/dbt/builtin.py +27 -11
- sqlmesh/dbt/column.py +17 -5
- sqlmesh/dbt/common.py +4 -2
- sqlmesh/dbt/context.py +14 -1
- sqlmesh/dbt/loader.py +60 -8
- sqlmesh/dbt/manifest.py +136 -8
- sqlmesh/dbt/model.py +105 -25
- sqlmesh/dbt/package.py +16 -1
- sqlmesh/dbt/profile.py +3 -3
- sqlmesh/dbt/project.py +12 -7
- sqlmesh/dbt/seed.py +1 -1
- sqlmesh/dbt/source.py +6 -1
- sqlmesh/dbt/target.py +25 -6
- sqlmesh/dbt/test.py +31 -1
- sqlmesh/migrations/v0000_baseline.py +3 -6
- sqlmesh/migrations/v0061_mysql_fix_blob_text_type.py +2 -5
- sqlmesh/migrations/v0062_add_model_gateway.py +2 -2
- sqlmesh/migrations/v0063_change_signals.py +2 -4
- sqlmesh/migrations/v0064_join_when_matched_strings.py +2 -4
- sqlmesh/migrations/v0065_add_model_optimize.py +2 -2
- sqlmesh/migrations/v0066_add_auto_restatements.py +2 -6
- sqlmesh/migrations/v0067_add_tsql_date_full_precision.py +2 -2
- sqlmesh/migrations/v0068_include_unrendered_query_in_metadata_hash.py +2 -2
- sqlmesh/migrations/v0069_update_dev_table_suffix.py +2 -4
- sqlmesh/migrations/v0070_include_grains_in_metadata_hash.py +2 -2
- sqlmesh/migrations/v0071_add_dev_version_to_intervals.py +2 -6
- sqlmesh/migrations/v0072_add_environment_statements.py +2 -4
- sqlmesh/migrations/v0073_remove_symbolic_disable_restatement.py +2 -4
- sqlmesh/migrations/v0074_add_partition_by_time_column_property.py +2 -2
- sqlmesh/migrations/v0075_remove_validate_query.py +2 -4
- sqlmesh/migrations/v0076_add_cron_tz.py +2 -2
- sqlmesh/migrations/v0077_fix_column_type_hash_calculation.py +2 -2
- sqlmesh/migrations/v0078_warn_if_non_migratable_python_env.py +2 -4
- sqlmesh/migrations/v0079_add_gateway_managed_property.py +7 -9
- sqlmesh/migrations/v0080_add_batch_size_to_scd_type_2_models.py +2 -2
- sqlmesh/migrations/v0081_update_partitioned_by.py +2 -4
- sqlmesh/migrations/v0082_warn_if_incorrectly_duplicated_statements.py +2 -4
- sqlmesh/migrations/v0083_use_sql_for_scd_time_data_type_data_hash.py +2 -2
- sqlmesh/migrations/v0084_normalize_quote_when_matched_and_merge_filter.py +2 -2
- sqlmesh/migrations/v0085_deterministic_repr.py +2 -4
- sqlmesh/migrations/v0086_check_deterministic_bug.py +2 -4
- sqlmesh/migrations/v0087_normalize_blueprint_variables.py +2 -4
- sqlmesh/migrations/v0088_warn_about_variable_python_env_diffs.py +2 -4
- sqlmesh/migrations/v0089_add_virtual_environment_mode.py +2 -2
- sqlmesh/migrations/v0090_add_forward_only_column.py +2 -6
- sqlmesh/migrations/v0091_on_additive_change.py +2 -2
- sqlmesh/migrations/v0092_warn_about_dbt_data_type_diff.py +2 -4
- sqlmesh/migrations/v0093_use_raw_sql_in_fingerprint.py +2 -2
- sqlmesh/migrations/v0094_add_dev_version_and_fingerprint_columns.py +2 -6
- sqlmesh/migrations/v0095_warn_about_dbt_raw_sql_diff.py +2 -4
- sqlmesh/migrations/v0096_remove_plan_dags_table.py +2 -4
- sqlmesh/migrations/v0097_add_dbt_name_in_node.py +2 -2
- sqlmesh/migrations/v0098_add_dbt_node_info_in_node.py +103 -0
- sqlmesh/migrations/v0099_add_last_altered_to_intervals.py +25 -0
- sqlmesh/migrations/v0100_add_grants_and_grants_target_layer.py +9 -0
- sqlmesh/utils/__init__.py +8 -1
- sqlmesh/utils/cache.py +5 -1
- sqlmesh/utils/date.py +1 -1
- sqlmesh/utils/errors.py +4 -0
- sqlmesh/utils/jinja.py +25 -2
- sqlmesh/utils/pydantic.py +6 -6
- sqlmesh/utils/windows.py +13 -3
- {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev4.dist-info}/METADATA +5 -5
- {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev4.dist-info}/RECORD +181 -176
- sqlmesh_dbt/cli.py +70 -7
- sqlmesh_dbt/console.py +14 -6
- sqlmesh_dbt/operations.py +103 -24
- sqlmesh_dbt/selectors.py +39 -1
- web/client/dist/assets/{Audits-Ucsx1GzF.js → Audits-CBiYyyx-.js} +1 -1
- web/client/dist/assets/{Banner-BWDzvavM.js → Banner-DSRbUlO5.js} +1 -1
- web/client/dist/assets/{ChevronDownIcon-D2VL13Ah.js → ChevronDownIcon-MK_nrjD_.js} +1 -1
- web/client/dist/assets/{ChevronRightIcon-DWGYbf1l.js → ChevronRightIcon-CLWtT22Q.js} +1 -1
- web/client/dist/assets/{Content-DdHDZM3I.js → Content-BNuGZN5l.js} +1 -1
- web/client/dist/assets/{Content-Bikfy8fh.js → Content-CSHJyW0n.js} +1 -1
- web/client/dist/assets/{Data-CzAJH7rW.js → Data-C1oRDbLx.js} +1 -1
- web/client/dist/assets/{DataCatalog-BJF11g8f.js → DataCatalog-HXyX2-_j.js} +1 -1
- web/client/dist/assets/{Editor-s0SBpV2y.js → Editor-BDyfpUuw.js} +1 -1
- web/client/dist/assets/{Editor-DgLhgKnm.js → Editor-D0jNItwC.js} +1 -1
- web/client/dist/assets/{Errors-D0m0O1d3.js → Errors-BfuFLcPi.js} +1 -1
- web/client/dist/assets/{FileExplorer-CEv0vXkt.js → FileExplorer-BR9IE3he.js} +1 -1
- web/client/dist/assets/{Footer-BwzXn8Ew.js → Footer-CgBEtiAh.js} +1 -1
- web/client/dist/assets/{Header-6heDkEqG.js → Header-DSqR6nSO.js} +1 -1
- web/client/dist/assets/{Input-obuJsD6k.js → Input-B-oZ6fGO.js} +1 -1
- web/client/dist/assets/Lineage-DYQVwDbD.js +1 -0
- web/client/dist/assets/{ListboxShow-HM9_qyrt.js → ListboxShow-BE5-xevs.js} +1 -1
- web/client/dist/assets/{ModelLineage-zWdKo0U2.js → ModelLineage-DkIFAYo4.js} +1 -1
- web/client/dist/assets/{Models-Bcu66SRz.js → Models-D5dWr8RB.js} +1 -1
- web/client/dist/assets/{Page-BWEEQfIt.js → Page-C-XfU5BR.js} +1 -1
- web/client/dist/assets/{Plan-C4gXCqlf.js → Plan-ZEuTINBq.js} +1 -1
- web/client/dist/assets/{PlusCircleIcon-CVDO651q.js → PlusCircleIcon-DVXAHG8_.js} +1 -1
- web/client/dist/assets/{ReportErrors-BT6xFwAr.js → ReportErrors-B7FEPzMB.js} +1 -1
- web/client/dist/assets/{Root-ryJoBK4h.js → Root-8aZyhPxF.js} +1 -1
- web/client/dist/assets/{SearchList-DB04sPb9.js → SearchList-W_iT2G82.js} +1 -1
- web/client/dist/assets/{SelectEnvironment-CUYcXUu6.js → SelectEnvironment-C65jALmO.js} +1 -1
- web/client/dist/assets/{SourceList-Doo_9ZGp.js → SourceList-DSLO6nVJ.js} +1 -1
- web/client/dist/assets/{SourceListItem-D5Mj7Dly.js → SourceListItem-BHt8d9-I.js} +1 -1
- web/client/dist/assets/{SplitPane-qHmkD1qy.js → SplitPane-CViaZmw6.js} +1 -1
- web/client/dist/assets/{Tests-DH1Z74ML.js → Tests-DhaVt5t1.js} +1 -1
- web/client/dist/assets/{Welcome-DqUJUNMF.js → Welcome-DvpjH-_4.js} +1 -1
- web/client/dist/assets/context-BctCsyGb.js +71 -0
- web/client/dist/assets/{context-Dr54UHLi.js → context-DFNeGsFF.js} +1 -1
- web/client/dist/assets/{editor-DYIP1yQ4.js → editor-CcO28cqd.js} +1 -1
- web/client/dist/assets/{file-DarlIDVi.js → file-CvJN3aZO.js} +1 -1
- web/client/dist/assets/{floating-ui.react-dom-BH3TFvkM.js → floating-ui.react-dom-CjE-JNW1.js} +1 -1
- web/client/dist/assets/{help-Bl8wqaQc.js → help-DuPhjipa.js} +1 -1
- web/client/dist/assets/{index-D1sR7wpN.js → index-C-dJH7yZ.js} +1 -1
- web/client/dist/assets/{index-O3mjYpnE.js → index-Dj0i1-CA.js} +2 -2
- web/client/dist/assets/{plan-CehRrJUG.js → plan-BTRSbjKn.js} +1 -1
- web/client/dist/assets/{popover-CqgMRE0G.js → popover-_Sf0yvOI.js} +1 -1
- web/client/dist/assets/{project-6gxepOhm.js → project-BvSOI8MY.js} +1 -1
- web/client/dist/index.html +1 -1
- web/client/dist/assets/Lineage-D0Hgdz2v.js +0 -1
- web/client/dist/assets/context-DgX0fp2E.js +0 -68
- {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev4.dist-info}/WHEEL +0 -0
- {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev4.dist-info}/entry_points.txt +0 -0
- {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev4.dist-info}/licenses/LICENSE +0 -0
- {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev4.dist-info}/top_level.txt +0 -0
sqlmesh/core/scheduler.py
CHANGED
|
@@ -251,7 +251,9 @@ class Scheduler:
|
|
|
251
251
|
**kwargs,
|
|
252
252
|
)
|
|
253
253
|
|
|
254
|
-
self.state_sync.add_interval(
|
|
254
|
+
self.state_sync.add_interval(
|
|
255
|
+
snapshot, start, end, is_dev=not is_deployable, last_altered_ts=now_timestamp()
|
|
256
|
+
)
|
|
255
257
|
return audit_results
|
|
256
258
|
|
|
257
259
|
def run(
|
|
@@ -335,6 +337,7 @@ class Scheduler:
|
|
|
335
337
|
deployability_index: t.Optional[DeployabilityIndex],
|
|
336
338
|
environment_naming_info: EnvironmentNamingInfo,
|
|
337
339
|
dag: t.Optional[DAG[SnapshotId]] = None,
|
|
340
|
+
is_restatement: bool = False,
|
|
338
341
|
) -> t.Dict[Snapshot, Intervals]:
|
|
339
342
|
dag = dag or snapshots_to_dag(merged_intervals)
|
|
340
343
|
|
|
@@ -349,7 +352,7 @@ class Scheduler:
|
|
|
349
352
|
)
|
|
350
353
|
for snapshot, intervals in merged_intervals.items()
|
|
351
354
|
}
|
|
352
|
-
snapshot_batches = {}
|
|
355
|
+
snapshot_batches: t.Dict[Snapshot, Intervals] = {}
|
|
353
356
|
all_unready_intervals: t.Dict[str, set[Interval]] = {}
|
|
354
357
|
for snapshot_id in dag:
|
|
355
358
|
if snapshot_id not in snapshot_intervals:
|
|
@@ -361,12 +364,22 @@ class Scheduler:
|
|
|
361
364
|
|
|
362
365
|
adapter = self.snapshot_evaluator.get_adapter(snapshot.model_gateway)
|
|
363
366
|
|
|
367
|
+
parent_intervals: Intervals = []
|
|
368
|
+
for parent_id in snapshot.parents:
|
|
369
|
+
parent_snapshot, _ = snapshot_intervals.get(parent_id, (None, []))
|
|
370
|
+
if not parent_snapshot or parent_snapshot.is_external:
|
|
371
|
+
continue
|
|
372
|
+
|
|
373
|
+
parent_intervals.extend(snapshot_batches[parent_snapshot])
|
|
374
|
+
|
|
364
375
|
context = ExecutionContext(
|
|
365
376
|
adapter,
|
|
366
377
|
self.snapshots_by_name,
|
|
367
378
|
deployability_index,
|
|
368
379
|
default_dialect=adapter.dialect,
|
|
369
380
|
default_catalog=self.default_catalog,
|
|
381
|
+
is_restatement=is_restatement,
|
|
382
|
+
parent_intervals=parent_intervals,
|
|
370
383
|
)
|
|
371
384
|
|
|
372
385
|
intervals = self._check_ready_intervals(
|
|
@@ -422,6 +435,7 @@ class Scheduler:
|
|
|
422
435
|
run_environment_statements: bool = False,
|
|
423
436
|
audit_only: bool = False,
|
|
424
437
|
auto_restatement_triggers: t.Dict[SnapshotId, t.List[SnapshotId]] = {},
|
|
438
|
+
is_restatement: bool = False,
|
|
425
439
|
) -> t.Tuple[t.List[NodeExecutionFailedError[SchedulingUnit]], t.List[SchedulingUnit]]:
|
|
426
440
|
"""Runs precomputed batches of missing intervals.
|
|
427
441
|
|
|
@@ -455,9 +469,12 @@ class Scheduler:
|
|
|
455
469
|
snapshot_dag = full_dag.subdag(*selected_snapshot_ids_set)
|
|
456
470
|
|
|
457
471
|
batched_intervals = self.batch_intervals(
|
|
458
|
-
merged_intervals,
|
|
472
|
+
merged_intervals,
|
|
473
|
+
deployability_index,
|
|
474
|
+
environment_naming_info,
|
|
475
|
+
dag=snapshot_dag,
|
|
476
|
+
is_restatement=is_restatement,
|
|
459
477
|
)
|
|
460
|
-
|
|
461
478
|
self.console.start_evaluation_progress(
|
|
462
479
|
batched_intervals,
|
|
463
480
|
environment_naming_info,
|
|
@@ -530,6 +547,10 @@ class Scheduler:
|
|
|
530
547
|
execution_time=execution_time,
|
|
531
548
|
)
|
|
532
549
|
else:
|
|
550
|
+
# If batch_index > 0, then the target table must exist since the first batch would have created it
|
|
551
|
+
target_table_exists = (
|
|
552
|
+
snapshot.snapshot_id not in snapshots_to_create or node.batch_index > 0
|
|
553
|
+
)
|
|
533
554
|
audit_results = self.evaluate(
|
|
534
555
|
snapshot=snapshot,
|
|
535
556
|
environment_naming_info=environment_naming_info,
|
|
@@ -540,7 +561,7 @@ class Scheduler:
|
|
|
540
561
|
batch_index=node.batch_index,
|
|
541
562
|
allow_destructive_snapshots=allow_destructive_snapshots,
|
|
542
563
|
allow_additive_snapshots=allow_additive_snapshots,
|
|
543
|
-
target_table_exists=
|
|
564
|
+
target_table_exists=target_table_exists,
|
|
544
565
|
selected_models=selected_models,
|
|
545
566
|
)
|
|
546
567
|
|
|
@@ -638,6 +659,7 @@ class Scheduler:
|
|
|
638
659
|
}
|
|
639
660
|
snapshots_to_create = snapshots_to_create or set()
|
|
640
661
|
original_snapshots_to_create = snapshots_to_create.copy()
|
|
662
|
+
upstream_dependencies_cache: t.Dict[SnapshotId, t.Set[SchedulingUnit]] = {}
|
|
641
663
|
|
|
642
664
|
snapshot_dag = snapshot_dag or snapshots_to_dag(batches)
|
|
643
665
|
dag = DAG[SchedulingUnit]()
|
|
@@ -649,12 +671,15 @@ class Scheduler:
|
|
|
649
671
|
snapshot = self.snapshots_by_name[snapshot_id.name]
|
|
650
672
|
intervals = intervals_per_snapshot.get(snapshot.name, [])
|
|
651
673
|
|
|
652
|
-
upstream_dependencies: t.
|
|
674
|
+
upstream_dependencies: t.Set[SchedulingUnit] = set()
|
|
653
675
|
|
|
654
676
|
for p_sid in snapshot.parents:
|
|
655
|
-
upstream_dependencies.
|
|
677
|
+
upstream_dependencies.update(
|
|
656
678
|
self._find_upstream_dependencies(
|
|
657
|
-
p_sid,
|
|
679
|
+
p_sid,
|
|
680
|
+
intervals_per_snapshot,
|
|
681
|
+
original_snapshots_to_create,
|
|
682
|
+
upstream_dependencies_cache,
|
|
658
683
|
)
|
|
659
684
|
)
|
|
660
685
|
|
|
@@ -705,29 +730,42 @@ class Scheduler:
|
|
|
705
730
|
parent_sid: SnapshotId,
|
|
706
731
|
intervals_per_snapshot: t.Dict[str, Intervals],
|
|
707
732
|
snapshots_to_create: t.Set[SnapshotId],
|
|
708
|
-
|
|
733
|
+
cache: t.Dict[SnapshotId, t.Set[SchedulingUnit]],
|
|
734
|
+
) -> t.Set[SchedulingUnit]:
|
|
709
735
|
if parent_sid not in self.snapshots:
|
|
710
|
-
return
|
|
736
|
+
return set()
|
|
737
|
+
if parent_sid in cache:
|
|
738
|
+
return cache[parent_sid]
|
|
711
739
|
|
|
712
740
|
p_intervals = intervals_per_snapshot.get(parent_sid.name, [])
|
|
713
741
|
|
|
742
|
+
parent_node: t.Optional[SchedulingUnit] = None
|
|
714
743
|
if p_intervals:
|
|
715
744
|
if len(p_intervals) > 1:
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
745
|
+
parent_node = DummyNode(snapshot_name=parent_sid.name)
|
|
746
|
+
else:
|
|
747
|
+
interval = p_intervals[0]
|
|
748
|
+
parent_node = EvaluateNode(
|
|
749
|
+
snapshot_name=parent_sid.name, interval=interval, batch_index=0
|
|
750
|
+
)
|
|
751
|
+
elif parent_sid in snapshots_to_create:
|
|
752
|
+
parent_node = CreateNode(snapshot_name=parent_sid.name)
|
|
753
|
+
|
|
754
|
+
if parent_node is not None:
|
|
755
|
+
cache[parent_sid] = {parent_node}
|
|
756
|
+
return {parent_node}
|
|
757
|
+
|
|
721
758
|
# This snapshot has no intervals and doesn't need creation which means
|
|
722
759
|
# that it can be a transitive dependency
|
|
723
|
-
transitive_deps: t.
|
|
760
|
+
transitive_deps: t.Set[SchedulingUnit] = set()
|
|
724
761
|
parent_snapshot = self.snapshots[parent_sid]
|
|
725
762
|
for grandparent_sid in parent_snapshot.parents:
|
|
726
|
-
transitive_deps.
|
|
763
|
+
transitive_deps.update(
|
|
727
764
|
self._find_upstream_dependencies(
|
|
728
|
-
grandparent_sid, intervals_per_snapshot, snapshots_to_create
|
|
765
|
+
grandparent_sid, intervals_per_snapshot, snapshots_to_create, cache
|
|
729
766
|
)
|
|
730
767
|
)
|
|
768
|
+
cache[parent_sid] = transitive_deps
|
|
731
769
|
return transitive_deps
|
|
732
770
|
|
|
733
771
|
def _run_or_audit(
|
|
@@ -839,7 +877,9 @@ class Scheduler:
|
|
|
839
877
|
run_environment_statements=run_environment_statements,
|
|
840
878
|
audit_only=audit_only,
|
|
841
879
|
auto_restatement_triggers=auto_restatement_triggers,
|
|
842
|
-
selected_models={
|
|
880
|
+
selected_models={
|
|
881
|
+
s.node.dbt_unique_id for s in merged_intervals if s.node.dbt_unique_id
|
|
882
|
+
},
|
|
843
883
|
)
|
|
844
884
|
|
|
845
885
|
return CompletionStatus.FAILURE if errors else CompletionStatus.SUCCESS
|
|
@@ -954,6 +994,7 @@ class Scheduler:
|
|
|
954
994
|
python_env=signals.python_env,
|
|
955
995
|
dialect=snapshot.model.dialect,
|
|
956
996
|
path=snapshot.model._path,
|
|
997
|
+
snapshot=snapshot,
|
|
957
998
|
kwargs=kwargs,
|
|
958
999
|
)
|
|
959
1000
|
except SQLMeshError as e:
|
sqlmesh/core/selector.py
CHANGED
|
@@ -3,6 +3,8 @@ from __future__ import annotations
|
|
|
3
3
|
import fnmatch
|
|
4
4
|
import typing as t
|
|
5
5
|
from pathlib import Path
|
|
6
|
+
from itertools import zip_longest
|
|
7
|
+
import abc
|
|
6
8
|
|
|
7
9
|
from sqlglot import exp
|
|
8
10
|
from sqlglot.errors import ParseError
|
|
@@ -14,6 +16,7 @@ from sqlmesh.core import constants as c
|
|
|
14
16
|
from sqlmesh.core.dialect import normalize_model_name
|
|
15
17
|
from sqlmesh.core.environment import Environment
|
|
16
18
|
from sqlmesh.core.model import update_model_schemas
|
|
19
|
+
from sqlmesh.core.audit import StandaloneAudit
|
|
17
20
|
from sqlmesh.utils import UniqueKeyDict
|
|
18
21
|
from sqlmesh.utils.dag import DAG
|
|
19
22
|
from sqlmesh.utils.git import GitClient
|
|
@@ -23,10 +26,11 @@ from sqlmesh.utils.errors import SQLMeshError
|
|
|
23
26
|
if t.TYPE_CHECKING:
|
|
24
27
|
from typing_extensions import Literal as Lit # noqa
|
|
25
28
|
from sqlmesh.core.model import Model
|
|
29
|
+
from sqlmesh.core.node import Node
|
|
26
30
|
from sqlmesh.core.state_sync import StateReader
|
|
27
31
|
|
|
28
32
|
|
|
29
|
-
class Selector:
|
|
33
|
+
class Selector(abc.ABC):
|
|
30
34
|
def __init__(
|
|
31
35
|
self,
|
|
32
36
|
state_reader: StateReader,
|
|
@@ -165,20 +169,20 @@ class Selector:
|
|
|
165
169
|
return models
|
|
166
170
|
|
|
167
171
|
def expand_model_selections(
|
|
168
|
-
self, model_selections: t.Iterable[str], models: t.Optional[t.Dict[str,
|
|
172
|
+
self, model_selections: t.Iterable[str], models: t.Optional[t.Dict[str, Node]] = None
|
|
169
173
|
) -> t.Set[str]:
|
|
170
|
-
"""Expands a set of model selections into a set of model
|
|
174
|
+
"""Expands a set of model selections into a set of model fqns that can be looked up in the Context.
|
|
171
175
|
|
|
172
176
|
Args:
|
|
173
177
|
model_selections: A set of model selections.
|
|
174
178
|
|
|
175
179
|
Returns:
|
|
176
|
-
A set of model
|
|
180
|
+
A set of model fqns.
|
|
177
181
|
"""
|
|
178
182
|
|
|
179
183
|
node = parse(" | ".join(f"({s})" for s in model_selections))
|
|
180
184
|
|
|
181
|
-
all_models = models or self._models
|
|
185
|
+
all_models: t.Dict[str, Node] = models or dict(self._models)
|
|
182
186
|
models_by_tags: t.Dict[str, t.Set[str]] = {}
|
|
183
187
|
|
|
184
188
|
for fqn, model in all_models.items():
|
|
@@ -194,10 +198,9 @@ class Selector:
|
|
|
194
198
|
return {
|
|
195
199
|
fqn
|
|
196
200
|
for fqn, model in all_models.items()
|
|
197
|
-
if fnmatch.fnmatchcase(model
|
|
201
|
+
if fnmatch.fnmatchcase(self._model_name(model), node.this)
|
|
198
202
|
}
|
|
199
|
-
|
|
200
|
-
return {fqn} if fqn in all_models else set()
|
|
203
|
+
return self._pattern_to_model_fqns(pattern, all_models)
|
|
201
204
|
if isinstance(node, exp.And):
|
|
202
205
|
return evaluate(node.left) & evaluate(node.right)
|
|
203
206
|
if isinstance(node, exp.Or):
|
|
@@ -225,6 +228,13 @@ class Selector:
|
|
|
225
228
|
if fnmatch.fnmatchcase(tag, pattern)
|
|
226
229
|
}
|
|
227
230
|
return models_by_tags.get(pattern, set())
|
|
231
|
+
if isinstance(node, ResourceType):
|
|
232
|
+
resource_type = node.name.lower()
|
|
233
|
+
return {
|
|
234
|
+
fqn
|
|
235
|
+
for fqn, model in all_models.items()
|
|
236
|
+
if self._matches_resource_type(resource_type, model)
|
|
237
|
+
}
|
|
228
238
|
if isinstance(node, Direction):
|
|
229
239
|
selected = set()
|
|
230
240
|
|
|
@@ -241,6 +251,117 @@ class Selector:
|
|
|
241
251
|
|
|
242
252
|
return evaluate(node)
|
|
243
253
|
|
|
254
|
+
@abc.abstractmethod
|
|
255
|
+
def _model_name(self, model: Node) -> str:
|
|
256
|
+
"""Given a model, return the name that a selector pattern contining wildcards should be fnmatch'd on"""
|
|
257
|
+
pass
|
|
258
|
+
|
|
259
|
+
@abc.abstractmethod
|
|
260
|
+
def _pattern_to_model_fqns(self, pattern: str, all_models: t.Dict[str, Node]) -> t.Set[str]:
|
|
261
|
+
"""Given a pattern, return the keys of the matching models from :all_models"""
|
|
262
|
+
pass
|
|
263
|
+
|
|
264
|
+
@abc.abstractmethod
|
|
265
|
+
def _matches_resource_type(self, resource_type: str, model: Node) -> bool:
|
|
266
|
+
"""Indicate whether or not the supplied model matches the supplied resource type"""
|
|
267
|
+
pass
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
class NativeSelector(Selector):
|
|
271
|
+
"""Implementation of selectors that matches objects based on SQLMesh native names"""
|
|
272
|
+
|
|
273
|
+
def _model_name(self, model: Node) -> str:
|
|
274
|
+
return model.name
|
|
275
|
+
|
|
276
|
+
def _pattern_to_model_fqns(self, pattern: str, all_models: t.Dict[str, Node]) -> t.Set[str]:
|
|
277
|
+
fqn = normalize_model_name(pattern, self._default_catalog, self._dialect)
|
|
278
|
+
return {fqn} if fqn in all_models else set()
|
|
279
|
+
|
|
280
|
+
def _matches_resource_type(self, resource_type: str, model: Node) -> bool:
|
|
281
|
+
if resource_type == "model":
|
|
282
|
+
return model.is_model
|
|
283
|
+
if resource_type == "audit":
|
|
284
|
+
return isinstance(model, StandaloneAudit)
|
|
285
|
+
|
|
286
|
+
raise SQLMeshError(f"Unsupported resource type: {resource_type}")
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
class DbtSelector(Selector):
|
|
290
|
+
"""Implementation of selectors that matches objects based on the DBT names instead of the SQLMesh native names"""
|
|
291
|
+
|
|
292
|
+
def _model_name(self, model: Node) -> str:
|
|
293
|
+
if dbt_fqn := model.dbt_fqn:
|
|
294
|
+
return dbt_fqn
|
|
295
|
+
raise SQLMeshError("dbt node information must be populated to use dbt selectors")
|
|
296
|
+
|
|
297
|
+
def _pattern_to_model_fqns(self, pattern: str, all_models: t.Dict[str, Node]) -> t.Set[str]:
|
|
298
|
+
# a pattern like "staging.customers" should match a model called "jaffle_shop.staging.customers"
|
|
299
|
+
# but not a model called "jaffle_shop.customers.staging"
|
|
300
|
+
# also a pattern like "aging" should not match "staging" so we need to consider components; not substrings
|
|
301
|
+
pattern_components = pattern.split(".")
|
|
302
|
+
first_pattern_component = pattern_components[0]
|
|
303
|
+
matches = set()
|
|
304
|
+
for fqn, model in all_models.items():
|
|
305
|
+
if not model.dbt_fqn:
|
|
306
|
+
continue
|
|
307
|
+
|
|
308
|
+
dbt_fqn_components = model.dbt_fqn.split(".")
|
|
309
|
+
try:
|
|
310
|
+
starting_idx = dbt_fqn_components.index(first_pattern_component)
|
|
311
|
+
except ValueError:
|
|
312
|
+
continue
|
|
313
|
+
for pattern_component, fqn_component in zip_longest(
|
|
314
|
+
pattern_components, dbt_fqn_components[starting_idx:]
|
|
315
|
+
):
|
|
316
|
+
if pattern_component and not fqn_component:
|
|
317
|
+
# the pattern still goes but we have run out of fqn components to match; no match
|
|
318
|
+
break
|
|
319
|
+
if fqn_component and not pattern_component:
|
|
320
|
+
# all elements of the pattern have matched elements of the fqn; match
|
|
321
|
+
matches.add(fqn)
|
|
322
|
+
break
|
|
323
|
+
if pattern_component != fqn_component:
|
|
324
|
+
# the pattern explicitly doesnt match a component; no match
|
|
325
|
+
break
|
|
326
|
+
else:
|
|
327
|
+
# called if no explicit break, indicating all components of the pattern matched all components of the fqn
|
|
328
|
+
matches.add(fqn)
|
|
329
|
+
return matches
|
|
330
|
+
|
|
331
|
+
def _matches_resource_type(self, resource_type: str, model: Node) -> bool:
|
|
332
|
+
"""
|
|
333
|
+
ref: https://docs.getdbt.com/reference/node-selection/methods#resource_type
|
|
334
|
+
|
|
335
|
+
# supported by SQLMesh
|
|
336
|
+
"model"
|
|
337
|
+
"seed"
|
|
338
|
+
"source" # external model
|
|
339
|
+
"test" # standalone audit
|
|
340
|
+
|
|
341
|
+
# not supported by SQLMesh yet, commented out to throw an error if someone tries to use them
|
|
342
|
+
"analysis"
|
|
343
|
+
"exposure"
|
|
344
|
+
"metric"
|
|
345
|
+
"saved_query"
|
|
346
|
+
"semantic_model"
|
|
347
|
+
"snapshot"
|
|
348
|
+
"unit_test"
|
|
349
|
+
"""
|
|
350
|
+
if resource_type not in ("model", "seed", "source", "test"):
|
|
351
|
+
raise SQLMeshError(f"Unsupported resource type: {resource_type}")
|
|
352
|
+
|
|
353
|
+
if isinstance(model, StandaloneAudit):
|
|
354
|
+
return resource_type == "test"
|
|
355
|
+
|
|
356
|
+
if resource_type == "model":
|
|
357
|
+
return model.is_model and not model.kind.is_external and not model.kind.is_seed
|
|
358
|
+
if resource_type == "source":
|
|
359
|
+
return model.kind.is_external
|
|
360
|
+
if resource_type == "seed":
|
|
361
|
+
return model.kind.is_seed
|
|
362
|
+
|
|
363
|
+
return False
|
|
364
|
+
|
|
244
365
|
|
|
245
366
|
class SelectorDialect(Dialect):
|
|
246
367
|
IDENTIFIERS_CAN_START_WITH_DIGIT = True
|
|
@@ -271,6 +392,10 @@ class Tag(exp.Expression):
|
|
|
271
392
|
pass
|
|
272
393
|
|
|
273
394
|
|
|
395
|
+
class ResourceType(exp.Expression):
|
|
396
|
+
pass
|
|
397
|
+
|
|
398
|
+
|
|
274
399
|
class Direction(exp.Expression):
|
|
275
400
|
pass
|
|
276
401
|
|
|
@@ -323,7 +448,8 @@ def parse(selector: str, dialect: DialectType = None) -> exp.Expression:
|
|
|
323
448
|
upstream = _match(TokenType.PLUS)
|
|
324
449
|
downstream = None
|
|
325
450
|
tag = _parse_kind("tag")
|
|
326
|
-
|
|
451
|
+
resource_type = False if tag else _parse_kind("resource_type")
|
|
452
|
+
git = False if resource_type else _parse_kind("git")
|
|
327
453
|
lstar = "*" if _match(TokenType.STAR) else ""
|
|
328
454
|
directions = {}
|
|
329
455
|
|
|
@@ -349,6 +475,8 @@ def parse(selector: str, dialect: DialectType = None) -> exp.Expression:
|
|
|
349
475
|
|
|
350
476
|
if tag:
|
|
351
477
|
this = Tag(this=this)
|
|
478
|
+
if resource_type:
|
|
479
|
+
this = ResourceType(this=this)
|
|
352
480
|
if git:
|
|
353
481
|
this = Git(this=this)
|
|
354
482
|
if directions:
|
sqlmesh/core/signal.py
CHANGED
|
@@ -1,7 +1,14 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import typing as t
|
|
4
4
|
from sqlmesh.utils import UniqueKeyDict, registry_decorator
|
|
5
|
+
from sqlmesh.utils.errors import MissingSourceError
|
|
6
|
+
|
|
7
|
+
if t.TYPE_CHECKING:
|
|
8
|
+
from sqlmesh.core.context import ExecutionContext
|
|
9
|
+
from sqlmesh.core.snapshot.definition import Snapshot
|
|
10
|
+
from sqlmesh.utils.date import DatetimeRanges
|
|
11
|
+
from sqlmesh.core.snapshot.definition import DeployabilityIndex
|
|
5
12
|
|
|
6
13
|
|
|
7
14
|
class signal(registry_decorator):
|
|
@@ -33,3 +40,59 @@ class signal(registry_decorator):
|
|
|
33
40
|
|
|
34
41
|
|
|
35
42
|
SignalRegistry = UniqueKeyDict[str, signal]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@signal()
|
|
46
|
+
def freshness(
|
|
47
|
+
batch: DatetimeRanges,
|
|
48
|
+
snapshot: Snapshot,
|
|
49
|
+
context: ExecutionContext,
|
|
50
|
+
) -> bool:
|
|
51
|
+
"""
|
|
52
|
+
Implements model freshness as a signal, i.e it considers this model to be fresh if:
|
|
53
|
+
- Any upstream SQLMesh model has available intervals to compute i.e is fresh
|
|
54
|
+
- Any upstream external model has been altered since the last time the model was evaluated
|
|
55
|
+
"""
|
|
56
|
+
adapter = context.engine_adapter
|
|
57
|
+
if context.is_restatement or not adapter.SUPPORTS_METADATA_TABLE_LAST_MODIFIED_TS:
|
|
58
|
+
return True
|
|
59
|
+
|
|
60
|
+
deployability_index = context.deployability_index or DeployabilityIndex.all_deployable()
|
|
61
|
+
|
|
62
|
+
last_altered_ts = (
|
|
63
|
+
snapshot.last_altered_ts
|
|
64
|
+
if deployability_index.is_deployable(snapshot)
|
|
65
|
+
else snapshot.dev_last_altered_ts
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
if not last_altered_ts:
|
|
69
|
+
return True
|
|
70
|
+
|
|
71
|
+
parent_snapshots = {context.snapshots[p.name] for p in snapshot.parents}
|
|
72
|
+
|
|
73
|
+
upstream_parent_snapshots = {p for p in parent_snapshots if not p.is_external}
|
|
74
|
+
external_parents = snapshot.node.depends_on - {p.name for p in upstream_parent_snapshots}
|
|
75
|
+
|
|
76
|
+
if context.parent_intervals:
|
|
77
|
+
# At least one upstream sqlmesh model has intervals to compute (i.e is fresh),
|
|
78
|
+
# so the current model is considered fresh too
|
|
79
|
+
return True
|
|
80
|
+
|
|
81
|
+
if external_parents:
|
|
82
|
+
external_last_altered_timestamps = adapter.get_table_last_modified_ts(
|
|
83
|
+
list(external_parents)
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
if len(external_last_altered_timestamps) != len(external_parents):
|
|
87
|
+
raise MissingSourceError(
|
|
88
|
+
f"Expected {len(external_parents)} sources to be present, but got {len(external_last_altered_timestamps)}."
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Finding new data means that the upstream depedencies have been altered
|
|
92
|
+
# since the last time the model was evaluated
|
|
93
|
+
return any(
|
|
94
|
+
external_last_altered_ts > last_altered_ts
|
|
95
|
+
for external_last_altered_ts in external_last_altered_timestamps
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
return False
|
|
@@ -11,6 +11,7 @@ from sqlmesh.core.snapshot.definition import (
|
|
|
11
11
|
SnapshotId as SnapshotId,
|
|
12
12
|
SnapshotIdBatch as SnapshotIdBatch,
|
|
13
13
|
SnapshotIdLike as SnapshotIdLike,
|
|
14
|
+
SnapshotIdAndVersionLike as SnapshotIdAndVersionLike,
|
|
14
15
|
SnapshotInfoLike as SnapshotInfoLike,
|
|
15
16
|
SnapshotIntervals as SnapshotIntervals,
|
|
16
17
|
SnapshotNameVersion as SnapshotNameVersion,
|