PyPI - sqlmesh - Versions diffs - 0.217.1.dev1__py3-none-any.whl → 0.227.2.dev20__py3-none-any.whl - Mend

sqlmesh 0.217.1.dev1py3-none-any.whl → 0.227.2.dev20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (190) hide show

sqlmesh/__init__.py +12 -2
sqlmesh/_version.py +2 -2
sqlmesh/cli/project_init.py +10 -2
sqlmesh/core/_typing.py +1 -0
sqlmesh/core/audit/definition.py +8 -2
sqlmesh/core/config/__init__.py +1 -1
sqlmesh/core/config/connection.py +20 -5
sqlmesh/core/config/dbt.py +13 -0
sqlmesh/core/config/janitor.py +12 -0
sqlmesh/core/config/loader.py +7 -0
sqlmesh/core/config/model.py +2 -0
sqlmesh/core/config/root.py +3 -0
sqlmesh/core/console.py +80 -2
sqlmesh/core/constants.py +1 -1
sqlmesh/core/context.py +112 -35
sqlmesh/core/dialect.py +3 -0
sqlmesh/core/engine_adapter/_typing.py +2 -0
sqlmesh/core/engine_adapter/base.py +330 -23
sqlmesh/core/engine_adapter/base_postgres.py +17 -1
sqlmesh/core/engine_adapter/bigquery.py +146 -7
sqlmesh/core/engine_adapter/clickhouse.py +17 -13
sqlmesh/core/engine_adapter/databricks.py +50 -2
sqlmesh/core/engine_adapter/fabric.py +110 -29
sqlmesh/core/engine_adapter/mixins.py +142 -48
sqlmesh/core/engine_adapter/mssql.py +15 -4
sqlmesh/core/engine_adapter/mysql.py +2 -2
sqlmesh/core/engine_adapter/postgres.py +9 -3
sqlmesh/core/engine_adapter/redshift.py +4 -0
sqlmesh/core/engine_adapter/risingwave.py +1 -0
sqlmesh/core/engine_adapter/shared.py +6 -0
sqlmesh/core/engine_adapter/snowflake.py +82 -11
sqlmesh/core/engine_adapter/spark.py +14 -10
sqlmesh/core/engine_adapter/trino.py +5 -2
sqlmesh/core/janitor.py +181 -0
sqlmesh/core/lineage.py +1 -0
sqlmesh/core/linter/rules/builtin.py +15 -0
sqlmesh/core/loader.py +17 -30
sqlmesh/core/macros.py +35 -13
sqlmesh/core/model/common.py +2 -0
sqlmesh/core/model/definition.py +72 -4
sqlmesh/core/model/kind.py +66 -2
sqlmesh/core/model/meta.py +107 -2
sqlmesh/core/node.py +101 -2
sqlmesh/core/plan/builder.py +15 -10
sqlmesh/core/plan/common.py +196 -2
sqlmesh/core/plan/definition.py +21 -6
sqlmesh/core/plan/evaluator.py +72 -113
sqlmesh/core/plan/explainer.py +90 -8
sqlmesh/core/plan/stages.py +42 -21
sqlmesh/core/renderer.py +26 -18
sqlmesh/core/scheduler.py +60 -19
sqlmesh/core/selector.py +137 -9
sqlmesh/core/signal.py +64 -1
sqlmesh/core/snapshot/__init__.py +1 -0
sqlmesh/core/snapshot/definition.py +109 -25
sqlmesh/core/snapshot/evaluator.py +610 -50
sqlmesh/core/state_sync/__init__.py +0 -1
sqlmesh/core/state_sync/base.py +31 -27
sqlmesh/core/state_sync/cache.py +12 -4
sqlmesh/core/state_sync/common.py +216 -111
sqlmesh/core/state_sync/db/facade.py +30 -15
sqlmesh/core/state_sync/db/interval.py +27 -7
sqlmesh/core/state_sync/db/migrator.py +14 -8
sqlmesh/core/state_sync/db/snapshot.py +119 -87
sqlmesh/core/table_diff.py +2 -2
sqlmesh/core/test/definition.py +14 -9
sqlmesh/core/test/discovery.py +4 -0
sqlmesh/dbt/adapter.py +20 -11
sqlmesh/dbt/basemodel.py +52 -41
sqlmesh/dbt/builtin.py +27 -11
sqlmesh/dbt/column.py +17 -5
sqlmesh/dbt/common.py +4 -2
sqlmesh/dbt/context.py +14 -1
sqlmesh/dbt/loader.py +60 -8
sqlmesh/dbt/manifest.py +136 -8
sqlmesh/dbt/model.py +105 -25
sqlmesh/dbt/package.py +16 -1
sqlmesh/dbt/profile.py +3 -3
sqlmesh/dbt/project.py +12 -7
sqlmesh/dbt/seed.py +1 -1
sqlmesh/dbt/source.py +6 -1
sqlmesh/dbt/target.py +25 -6
sqlmesh/dbt/test.py +31 -1
sqlmesh/integrations/github/cicd/controller.py +6 -2
sqlmesh/lsp/context.py +4 -2
sqlmesh/magics.py +1 -1
sqlmesh/migrations/v0000_baseline.py +3 -6
sqlmesh/migrations/v0061_mysql_fix_blob_text_type.py +2 -5
sqlmesh/migrations/v0062_add_model_gateway.py +2 -2
sqlmesh/migrations/v0063_change_signals.py +2 -4
sqlmesh/migrations/v0064_join_when_matched_strings.py +2 -4
sqlmesh/migrations/v0065_add_model_optimize.py +2 -2
sqlmesh/migrations/v0066_add_auto_restatements.py +2 -6
sqlmesh/migrations/v0067_add_tsql_date_full_precision.py +2 -2
sqlmesh/migrations/v0068_include_unrendered_query_in_metadata_hash.py +2 -2
sqlmesh/migrations/v0069_update_dev_table_suffix.py +2 -4
sqlmesh/migrations/v0070_include_grains_in_metadata_hash.py +2 -2
sqlmesh/migrations/v0071_add_dev_version_to_intervals.py +2 -6
sqlmesh/migrations/v0072_add_environment_statements.py +2 -4
sqlmesh/migrations/v0073_remove_symbolic_disable_restatement.py +2 -4
sqlmesh/migrations/v0074_add_partition_by_time_column_property.py +2 -2
sqlmesh/migrations/v0075_remove_validate_query.py +2 -4
sqlmesh/migrations/v0076_add_cron_tz.py +2 -2
sqlmesh/migrations/v0077_fix_column_type_hash_calculation.py +2 -2
sqlmesh/migrations/v0078_warn_if_non_migratable_python_env.py +2 -4
sqlmesh/migrations/v0079_add_gateway_managed_property.py +7 -9
sqlmesh/migrations/v0080_add_batch_size_to_scd_type_2_models.py +2 -2
sqlmesh/migrations/v0081_update_partitioned_by.py +2 -4
sqlmesh/migrations/v0082_warn_if_incorrectly_duplicated_statements.py +2 -4
sqlmesh/migrations/v0083_use_sql_for_scd_time_data_type_data_hash.py +2 -2
sqlmesh/migrations/v0084_normalize_quote_when_matched_and_merge_filter.py +2 -2
sqlmesh/migrations/v0085_deterministic_repr.py +2 -4
sqlmesh/migrations/v0086_check_deterministic_bug.py +2 -4
sqlmesh/migrations/v0087_normalize_blueprint_variables.py +2 -4
sqlmesh/migrations/v0088_warn_about_variable_python_env_diffs.py +2 -4
sqlmesh/migrations/v0089_add_virtual_environment_mode.py +2 -2
sqlmesh/migrations/v0090_add_forward_only_column.py +2 -6
sqlmesh/migrations/v0091_on_additive_change.py +2 -2
sqlmesh/migrations/v0092_warn_about_dbt_data_type_diff.py +2 -4
sqlmesh/migrations/v0093_use_raw_sql_in_fingerprint.py +2 -2
sqlmesh/migrations/v0094_add_dev_version_and_fingerprint_columns.py +2 -6
sqlmesh/migrations/v0095_warn_about_dbt_raw_sql_diff.py +2 -4
sqlmesh/migrations/v0096_remove_plan_dags_table.py +2 -4
sqlmesh/migrations/v0097_add_dbt_name_in_node.py +2 -2
sqlmesh/migrations/v0098_add_dbt_node_info_in_node.py +103 -0
sqlmesh/migrations/v0099_add_last_altered_to_intervals.py +25 -0
sqlmesh/migrations/v0100_add_grants_and_grants_target_layer.py +9 -0
sqlmesh/utils/__init__.py +8 -1
sqlmesh/utils/cache.py +5 -1
sqlmesh/utils/date.py +1 -1
sqlmesh/utils/errors.py +4 -0
sqlmesh/utils/git.py +3 -1
sqlmesh/utils/jinja.py +25 -2
sqlmesh/utils/pydantic.py +6 -6
sqlmesh/utils/windows.py +13 -3
{sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/METADATA +5 -5
{sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/RECORD +188 -183
sqlmesh_dbt/cli.py +70 -7
sqlmesh_dbt/console.py +14 -6
sqlmesh_dbt/operations.py +103 -24
sqlmesh_dbt/selectors.py +39 -1
web/client/dist/assets/{Audits-Ucsx1GzF.js → Audits-CBiYyyx-.js} +1 -1
web/client/dist/assets/{Banner-BWDzvavM.js → Banner-DSRbUlO5.js} +1 -1
web/client/dist/assets/{ChevronDownIcon-D2VL13Ah.js → ChevronDownIcon-MK_nrjD_.js} +1 -1
web/client/dist/assets/{ChevronRightIcon-DWGYbf1l.js → ChevronRightIcon-CLWtT22Q.js} +1 -1
web/client/dist/assets/{Content-DdHDZM3I.js → Content-BNuGZN5l.js} +1 -1
web/client/dist/assets/{Content-Bikfy8fh.js → Content-CSHJyW0n.js} +1 -1
web/client/dist/assets/{Data-CzAJH7rW.js → Data-C1oRDbLx.js} +1 -1
web/client/dist/assets/{DataCatalog-BJF11g8f.js → DataCatalog-HXyX2-_j.js} +1 -1
web/client/dist/assets/{Editor-s0SBpV2y.js → Editor-BDyfpUuw.js} +1 -1
web/client/dist/assets/{Editor-DgLhgKnm.js → Editor-D0jNItwC.js} +1 -1
web/client/dist/assets/{Errors-D0m0O1d3.js → Errors-BfuFLcPi.js} +1 -1
web/client/dist/assets/{FileExplorer-CEv0vXkt.js → FileExplorer-BR9IE3he.js} +1 -1
web/client/dist/assets/{Footer-BwzXn8Ew.js → Footer-CgBEtiAh.js} +1 -1
web/client/dist/assets/{Header-6heDkEqG.js → Header-DSqR6nSO.js} +1 -1
web/client/dist/assets/{Input-obuJsD6k.js → Input-B-oZ6fGO.js} +1 -1
web/client/dist/assets/Lineage-DYQVwDbD.js +1 -0
web/client/dist/assets/{ListboxShow-HM9_qyrt.js → ListboxShow-BE5-xevs.js} +1 -1
web/client/dist/assets/{ModelLineage-zWdKo0U2.js → ModelLineage-DkIFAYo4.js} +1 -1
web/client/dist/assets/{Models-Bcu66SRz.js → Models-D5dWr8RB.js} +1 -1
web/client/dist/assets/{Page-BWEEQfIt.js → Page-C-XfU5BR.js} +1 -1
web/client/dist/assets/{Plan-C4gXCqlf.js → Plan-ZEuTINBq.js} +1 -1
web/client/dist/assets/{PlusCircleIcon-CVDO651q.js → PlusCircleIcon-DVXAHG8_.js} +1 -1
web/client/dist/assets/{ReportErrors-BT6xFwAr.js → ReportErrors-B7FEPzMB.js} +1 -1
web/client/dist/assets/{Root-ryJoBK4h.js → Root-8aZyhPxF.js} +1 -1
web/client/dist/assets/{SearchList-DB04sPb9.js → SearchList-W_iT2G82.js} +1 -1
web/client/dist/assets/{SelectEnvironment-CUYcXUu6.js → SelectEnvironment-C65jALmO.js} +1 -1
web/client/dist/assets/{SourceList-Doo_9ZGp.js → SourceList-DSLO6nVJ.js} +1 -1
web/client/dist/assets/{SourceListItem-D5Mj7Dly.js → SourceListItem-BHt8d9-I.js} +1 -1
web/client/dist/assets/{SplitPane-qHmkD1qy.js → SplitPane-CViaZmw6.js} +1 -1
web/client/dist/assets/{Tests-DH1Z74ML.js → Tests-DhaVt5t1.js} +1 -1
web/client/dist/assets/{Welcome-DqUJUNMF.js → Welcome-DvpjH-_4.js} +1 -1
web/client/dist/assets/context-BctCsyGb.js +71 -0
web/client/dist/assets/{context-Dr54UHLi.js → context-DFNeGsFF.js} +1 -1
web/client/dist/assets/{editor-DYIP1yQ4.js → editor-CcO28cqd.js} +1 -1
web/client/dist/assets/{file-DarlIDVi.js → file-CvJN3aZO.js} +1 -1
web/client/dist/assets/{floating-ui.react-dom-BH3TFvkM.js → floating-ui.react-dom-CjE-JNW1.js} +1 -1
web/client/dist/assets/{help-Bl8wqaQc.js → help-DuPhjipa.js} +1 -1
web/client/dist/assets/{index-D1sR7wpN.js → index-C-dJH7yZ.js} +1 -1
web/client/dist/assets/{index-O3mjYpnE.js → index-Dj0i1-CA.js} +2 -2
web/client/dist/assets/{plan-CehRrJUG.js → plan-BTRSbjKn.js} +1 -1
web/client/dist/assets/{popover-CqgMRE0G.js → popover-_Sf0yvOI.js} +1 -1
web/client/dist/assets/{project-6gxepOhm.js → project-BvSOI8MY.js} +1 -1
web/client/dist/index.html +1 -1
web/client/dist/assets/Lineage-D0Hgdz2v.js +0 -1
web/client/dist/assets/context-DgX0fp2E.js +0 -68
{sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/WHEEL +0 -0
{sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/entry_points.txt +0 -0
{sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/licenses/LICENSE +0 -0
{sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/top_level.txt +0 -0

sqlmesh/core/plan/stages.py CHANGED Viewed

@@ -12,8 +12,9 @@ from sqlmesh.core.snapshot.definition import (
     Snapshot,
     SnapshotTableInfo,
     SnapshotId,
-    Interval,
+    snapshots_to_dag,
 )
+from sqlmesh.utils.errors import PlanError
 @dataclass
@@ -98,14 +99,19 @@ class AuditOnlyRunStage:
 @dataclass
 class RestatementStage:
-    """Restate intervals for given snapshots.
+    """Clear intervals from state for snapshots in *other* environments, when restatements are requested in prod.
+    This stage is effectively a "marker" stage to trigger the plan evaluator to perform the "clear intervals" logic after the BackfillStage has completed.
+    The "clear intervals" logic is executed just-in-time using the latest state available in order to pick up new snapshots that may have
+    been created while the BackfillStage was running, which is why we do not build a list of snapshots to clear at plan time and defer to evaluation time.
+    Note that this stage is only present on `prod` plans because dev plans do not need to worry about clearing intervals in other environments.
     Args:
-        snapshot_intervals: Intervals to restate.
-        all_snapshots: All snapshots in the plan by name.
+        all_snapshots: All snapshots in the plan by name. Note that this does not include the snapshots from other environments that will get their
+            intervals cleared, it's included here as an optimization to prevent having to re-fetch the current plan's snapshots
     """
-    snapshot_intervals: t.Dict[SnapshotTableInfo, Interval]
     all_snapshots: t.Dict[str, Snapshot]
@@ -244,6 +250,7 @@ class PlanStagesBuilder:
         stored_snapshots = self.state_reader.get_snapshots(plan.environment.snapshots)
         snapshots = {**new_snapshots, **stored_snapshots}
         snapshots_by_name = {s.name: s for s in snapshots.values()}
+        dag = snapshots_to_dag(snapshots.values())
         all_selected_for_backfill_snapshots = {
             s.snapshot_id for s in snapshots.values() if plan.is_selected_for_backfill(s.name)
@@ -261,14 +268,21 @@ class PlanStagesBuilder:
             before_promote_snapshots = {
                 s.snapshot_id
                 for s in snapshots.values()
-                if deployability_index.is_representative(s)
+                if (deployability_index.is_representative(s) or s.is_seed)
                 and plan.is_selected_for_backfill(s.name)
             }
             after_promote_snapshots = all_selected_for_backfill_snapshots - before_promote_snapshots
             deployability_index = DeployabilityIndex.all_deployable()
+            snapshot_ids_with_schema_migration = [
+                s.snapshot_id for s in snapshots.values() if s.requires_schema_migration_in_prod
+            ]
+            # Include all upstream dependencies of snapshots that require schema migration to make sure
+            # the upstream tables are created before the schema updates are applied
             snapshots_with_schema_migration = [
-                s for s in snapshots.values() if s.requires_schema_migration_in_prod
+                snapshots[s_id]
+                for s_id in dag.subdag(*snapshot_ids_with_schema_migration)
+                if snapshots[s_id].supports_schema_migration_in_prod
             ]
         snapshots_to_intervals = self._missing_intervals(
@@ -321,10 +335,6 @@ class PlanStagesBuilder:
         if audit_only_snapshots:
             stages.append(AuditOnlyRunStage(snapshots=list(audit_only_snapshots.values())))
-        restatement_stage = self._get_restatement_stage(plan, snapshots_by_name)
-        if restatement_stage:
-            stages.append(restatement_stage)
         if missing_intervals_before_promote:
             stages.append(
                 BackfillStage(
@@ -349,6 +359,15 @@ class PlanStagesBuilder:
                 )
             )
+        # note: "restatement stage" (which is clearing intervals in state - not actually performing the restatements, that's the backfill stage)
+        # needs to come *after* the backfill stage so that at no time do other plans / runs see empty prod intervals and compete with this plan to try to fill them.
+        # in addition, when we update intervals in state, we only clear intervals from dev snapshots to force dev models to be backfilled based on the new prod data.
+        # we can leave prod intervals alone because by the time this plan finishes, the intervals in state have not actually changed, since restatement replaces
+        # data for existing intervals and does not produce new ones
+        restatement_stage = self._get_restatement_stage(plan, snapshots_by_name)
+        if restatement_stage:
+            stages.append(restatement_stage)
         stages.append(
             EnvironmentRecordUpdateStage(
                 no_gaps_snapshot_names={s.name for s in before_promote_snapshots}
@@ -443,16 +462,18 @@ class PlanStagesBuilder:
     def _get_restatement_stage(
         self, plan: EvaluatablePlan, snapshots_by_name: t.Dict[str, Snapshot]
     ) -> t.Optional[RestatementStage]:
-        snapshot_intervals_to_restate = {}
-        for name, interval in plan.restatements.items():
-            restated_snapshot = snapshots_by_name[name]
-            restated_snapshot.remove_interval(interval)
-            snapshot_intervals_to_restate[restated_snapshot.table_info] = interval
-        if not snapshot_intervals_to_restate or plan.is_dev:
-            return None
-        return RestatementStage(
-            snapshot_intervals=snapshot_intervals_to_restate, all_snapshots=snapshots_by_name
-        )
+        if plan.restate_all_snapshots:
+            if plan.is_dev:
+                raise PlanError(
+                    "Clearing intervals from state across dev model versions is only valid for prod plans"
+                )
+            if plan.restatements:
+                return RestatementStage(
+                    all_snapshots=snapshots_by_name,
+                )
+        return None
     def _get_physical_layer_update_stage(
         self,

sqlmesh/core/renderer.py CHANGED Viewed

@@ -6,7 +6,7 @@ from contextlib import contextmanager
 from functools import partial
 from pathlib import Path
-from sqlglot import exp, parse
+from sqlglot import exp, Dialect
 from sqlglot.errors import SqlglotError
 from sqlglot.helper import ensure_list
 from sqlglot.optimizer.annotate_types import annotate_types
@@ -196,7 +196,14 @@ class BaseExpressionRenderer:
             **kwargs,
         }
+        if this_model:
+            render_kwargs["this_model"] = this_model
+        macro_evaluator.locals.update(render_kwargs)
         variables = kwargs.pop("variables", {})
+        if variables:
+            macro_evaluator.locals.setdefault(c.SQLMESH_VARS, {}).update(variables)
         expressions = [self._expression]
         if isinstance(self._expression, d.Jinja):
@@ -249,23 +256,24 @@ class BaseExpressionRenderer:
                 ) from ex
             if rendered_expression.strip():
-                try:
-                    expressions = [e for e in parse(rendered_expression, read=self._dialect) if e]
-                    if not expressions:
-                        raise ConfigError(f"Failed to parse an expression:\n{self._expression}")
-                except Exception as ex:
-                    raise ConfigError(
-                        f"Could not parse the rendered jinja at '{self._path}'.\n{ex}"
-                    ) from ex
-        if this_model:
-            render_kwargs["this_model"] = this_model
-        macro_evaluator.locals.update(render_kwargs)
-        if variables:
-            macro_evaluator.locals.setdefault(c.SQLMESH_VARS, {}).update(variables)
+                # ensure there is actual SQL and not just comments and non-SQL jinja
+                dialect = Dialect.get_or_raise(self._dialect)
+                tokens = dialect.tokenize(rendered_expression)
+                if tokens:
+                    try:
+                        expressions = [
+                            e for e in dialect.parser().parse(tokens, rendered_expression) if e
+                        ]
+                        if not expressions:
+                            raise ConfigError(
+                                f"Failed to parse an expression:\n{rendered_expression}"
+                            )
+                    except Exception as ex:
+                        raise ConfigError(
+                            f"Could not parse the rendered jinja at '{self._path}'.\n{ex}"
+                        ) from ex
         for definition in self._macro_definitions:
             try:

sqlmesh/core/scheduler.py CHANGED Viewed

@@ -251,7 +251,9 @@ class Scheduler:
             **kwargs,
         )
-        self.state_sync.add_interval(snapshot, start, end, is_dev=not is_deployable)
+        self.state_sync.add_interval(
+            snapshot, start, end, is_dev=not is_deployable, last_altered_ts=now_timestamp()
+        )
         return audit_results
     def run(
@@ -335,6 +337,7 @@ class Scheduler:
         deployability_index: t.Optional[DeployabilityIndex],
         environment_naming_info: EnvironmentNamingInfo,
         dag: t.Optional[DAG[SnapshotId]] = None,
+        is_restatement: bool = False,
     ) -> t.Dict[Snapshot, Intervals]:
         dag = dag or snapshots_to_dag(merged_intervals)
@@ -349,7 +352,7 @@ class Scheduler:
             )
             for snapshot, intervals in merged_intervals.items()
         }
-        snapshot_batches = {}
+        snapshot_batches: t.Dict[Snapshot, Intervals] = {}
         all_unready_intervals: t.Dict[str, set[Interval]] = {}
         for snapshot_id in dag:
             if snapshot_id not in snapshot_intervals:
@@ -361,12 +364,22 @@ class Scheduler:
             adapter = self.snapshot_evaluator.get_adapter(snapshot.model_gateway)
+            parent_intervals: Intervals = []
+            for parent_id in snapshot.parents:
+                parent_snapshot, _ = snapshot_intervals.get(parent_id, (None, []))
+                if not parent_snapshot or parent_snapshot.is_external:
+                    continue
+                parent_intervals.extend(snapshot_batches[parent_snapshot])
             context = ExecutionContext(
                 adapter,
                 self.snapshots_by_name,
                 deployability_index,
                 default_dialect=adapter.dialect,
                 default_catalog=self.default_catalog,
+                is_restatement=is_restatement,
+                parent_intervals=parent_intervals,
             )
             intervals = self._check_ready_intervals(
@@ -422,6 +435,7 @@ class Scheduler:
         run_environment_statements: bool = False,
         audit_only: bool = False,
         auto_restatement_triggers: t.Dict[SnapshotId, t.List[SnapshotId]] = {},
+        is_restatement: bool = False,
     ) -> t.Tuple[t.List[NodeExecutionFailedError[SchedulingUnit]], t.List[SchedulingUnit]]:
         """Runs precomputed batches of missing intervals.
@@ -455,9 +469,12 @@ class Scheduler:
         snapshot_dag = full_dag.subdag(*selected_snapshot_ids_set)
         batched_intervals = self.batch_intervals(
-            merged_intervals, deployability_index, environment_naming_info, dag=snapshot_dag
+            merged_intervals,
+            deployability_index,
+            environment_naming_info,
+            dag=snapshot_dag,
+            is_restatement=is_restatement,
         )
         self.console.start_evaluation_progress(
             batched_intervals,
             environment_naming_info,
@@ -530,6 +547,10 @@ class Scheduler:
                             execution_time=execution_time,
                         )
                     else:
+                        # If batch_index > 0, then the target table must exist since the first batch would have created it
+                        target_table_exists = (
+                            snapshot.snapshot_id not in snapshots_to_create or node.batch_index > 0
+                        )
                         audit_results = self.evaluate(
                             snapshot=snapshot,
                             environment_naming_info=environment_naming_info,
@@ -540,7 +561,7 @@ class Scheduler:
                             batch_index=node.batch_index,
                             allow_destructive_snapshots=allow_destructive_snapshots,
                             allow_additive_snapshots=allow_additive_snapshots,
-                            target_table_exists=snapshot.snapshot_id not in snapshots_to_create,
+                            target_table_exists=target_table_exists,
                             selected_models=selected_models,
                         )
@@ -638,6 +659,7 @@ class Scheduler:
         }
         snapshots_to_create = snapshots_to_create or set()
         original_snapshots_to_create = snapshots_to_create.copy()
+        upstream_dependencies_cache: t.Dict[SnapshotId, t.Set[SchedulingUnit]] = {}
         snapshot_dag = snapshot_dag or snapshots_to_dag(batches)
         dag = DAG[SchedulingUnit]()
@@ -649,12 +671,15 @@ class Scheduler:
             snapshot = self.snapshots_by_name[snapshot_id.name]
             intervals = intervals_per_snapshot.get(snapshot.name, [])
-            upstream_dependencies: t.List[SchedulingUnit] = []
+            upstream_dependencies: t.Set[SchedulingUnit] = set()
             for p_sid in snapshot.parents:
-                upstream_dependencies.extend(
+                upstream_dependencies.update(
                     self._find_upstream_dependencies(
-                        p_sid, intervals_per_snapshot, original_snapshots_to_create
+                        p_sid,
+                        intervals_per_snapshot,
+                        original_snapshots_to_create,
+                        upstream_dependencies_cache,
                     )
                 )
@@ -705,29 +730,42 @@ class Scheduler:
         parent_sid: SnapshotId,
         intervals_per_snapshot: t.Dict[str, Intervals],
         snapshots_to_create: t.Set[SnapshotId],
-    ) -> t.List[SchedulingUnit]:
+        cache: t.Dict[SnapshotId, t.Set[SchedulingUnit]],
+    ) -> t.Set[SchedulingUnit]:
         if parent_sid not in self.snapshots:
-            return []
+            return set()
+        if parent_sid in cache:
+            return cache[parent_sid]
         p_intervals = intervals_per_snapshot.get(parent_sid.name, [])
+        parent_node: t.Optional[SchedulingUnit] = None
         if p_intervals:
             if len(p_intervals) > 1:
-                return [DummyNode(snapshot_name=parent_sid.name)]
-            interval = p_intervals[0]
-            return [EvaluateNode(snapshot_name=parent_sid.name, interval=interval, batch_index=0)]
-        if parent_sid in snapshots_to_create:
-            return [CreateNode(snapshot_name=parent_sid.name)]
+                parent_node = DummyNode(snapshot_name=parent_sid.name)
+            else:
+                interval = p_intervals[0]
+                parent_node = EvaluateNode(
+                    snapshot_name=parent_sid.name, interval=interval, batch_index=0
+                )
+        elif parent_sid in snapshots_to_create:
+            parent_node = CreateNode(snapshot_name=parent_sid.name)
+        if parent_node is not None:
+            cache[parent_sid] = {parent_node}
+            return {parent_node}
         # This snapshot has no intervals and doesn't need creation which means
         # that it can be a transitive dependency
-        transitive_deps: t.List[SchedulingUnit] = []
+        transitive_deps: t.Set[SchedulingUnit] = set()
         parent_snapshot = self.snapshots[parent_sid]
         for grandparent_sid in parent_snapshot.parents:
-            transitive_deps.extend(
+            transitive_deps.update(
                 self._find_upstream_dependencies(
-                    grandparent_sid, intervals_per_snapshot, snapshots_to_create
+                    grandparent_sid, intervals_per_snapshot, snapshots_to_create, cache
                 )
             )
+        cache[parent_sid] = transitive_deps
         return transitive_deps
     def _run_or_audit(
@@ -839,7 +877,9 @@ class Scheduler:
             run_environment_statements=run_environment_statements,
             audit_only=audit_only,
             auto_restatement_triggers=auto_restatement_triggers,
-            selected_models={s.node.dbt_name for s in merged_intervals if s.node.dbt_name},
+            selected_models={
+                s.node.dbt_unique_id for s in merged_intervals if s.node.dbt_unique_id
+            },
         )
         return CompletionStatus.FAILURE if errors else CompletionStatus.SUCCESS
@@ -954,6 +994,7 @@ class Scheduler:
                     python_env=signals.python_env,
                     dialect=snapshot.model.dialect,
                     path=snapshot.model._path,
+                    snapshot=snapshot,
                     kwargs=kwargs,
                 )
             except SQLMeshError as e:

sqlmesh/core/selector.py CHANGED Viewed

@@ -3,6 +3,8 @@ from __future__ import annotations
 import fnmatch
 import typing as t
 from pathlib import Path
+from itertools import zip_longest
+import abc
 from sqlglot import exp
 from sqlglot.errors import ParseError
@@ -14,6 +16,7 @@ from sqlmesh.core import constants as c
 from sqlmesh.core.dialect import normalize_model_name
 from sqlmesh.core.environment import Environment
 from sqlmesh.core.model import update_model_schemas
+from sqlmesh.core.audit import StandaloneAudit
 from sqlmesh.utils import UniqueKeyDict
 from sqlmesh.utils.dag import DAG
 from sqlmesh.utils.git import GitClient
@@ -23,10 +26,11 @@ from sqlmesh.utils.errors import SQLMeshError
 if t.TYPE_CHECKING:
     from typing_extensions import Literal as Lit  # noqa
     from sqlmesh.core.model import Model
+    from sqlmesh.core.node import Node
     from sqlmesh.core.state_sync import StateReader
-class Selector:
+class Selector(abc.ABC):
     def __init__(
         self,
         state_reader: StateReader,
@@ -165,20 +169,20 @@ class Selector:
         return models
     def expand_model_selections(
-        self, model_selections: t.Iterable[str], models: t.Optional[t.Dict[str, Model]] = None
+        self, model_selections: t.Iterable[str], models: t.Optional[t.Dict[str, Node]] = None
     ) -> t.Set[str]:
-        """Expands a set of model selections into a set of model names.
+        """Expands a set of model selections into a set of model fqns that can be looked up in the Context.
         Args:
             model_selections: A set of model selections.
         Returns:
-            A set of model names.
+            A set of model fqns.
         """
         node = parse(" | ".join(f"({s})" for s in model_selections))
-        all_models = models or self._models
+        all_models: t.Dict[str, Node] = models or dict(self._models)
         models_by_tags: t.Dict[str, t.Set[str]] = {}
         for fqn, model in all_models.items():
@@ -194,10 +198,9 @@ class Selector:
                     return {
                         fqn
                         for fqn, model in all_models.items()
-                        if fnmatch.fnmatchcase(model.name, node.this)
+                        if fnmatch.fnmatchcase(self._model_name(model), node.this)
                     }
-                fqn = normalize_model_name(pattern, self._default_catalog, self._dialect)
-                return {fqn} if fqn in all_models else set()
+                return self._pattern_to_model_fqns(pattern, all_models)
             if isinstance(node, exp.And):
                 return evaluate(node.left) & evaluate(node.right)
             if isinstance(node, exp.Or):
@@ -225,6 +228,13 @@ class Selector:
                         if fnmatch.fnmatchcase(tag, pattern)
                     }
                 return models_by_tags.get(pattern, set())
+            if isinstance(node, ResourceType):
+                resource_type = node.name.lower()
+                return {
+                    fqn
+                    for fqn, model in all_models.items()
+                    if self._matches_resource_type(resource_type, model)
+                }
             if isinstance(node, Direction):
                 selected = set()
@@ -241,6 +251,117 @@ class Selector:
         return evaluate(node)
+    @abc.abstractmethod
+    def _model_name(self, model: Node) -> str:
+        """Given a model, return the name that a selector pattern contining wildcards should be fnmatch'd on"""
+        pass
+    @abc.abstractmethod
+    def _pattern_to_model_fqns(self, pattern: str, all_models: t.Dict[str, Node]) -> t.Set[str]:
+        """Given a pattern, return the keys of the matching models from :all_models"""
+        pass
+    @abc.abstractmethod
+    def _matches_resource_type(self, resource_type: str, model: Node) -> bool:
+        """Indicate whether or not the supplied model matches the supplied resource type"""
+        pass
+class NativeSelector(Selector):
+    """Implementation of selectors that matches objects based on SQLMesh native names"""
+    def _model_name(self, model: Node) -> str:
+        return model.name
+    def _pattern_to_model_fqns(self, pattern: str, all_models: t.Dict[str, Node]) -> t.Set[str]:
+        fqn = normalize_model_name(pattern, self._default_catalog, self._dialect)
+        return {fqn} if fqn in all_models else set()
+    def _matches_resource_type(self, resource_type: str, model: Node) -> bool:
+        if resource_type == "model":
+            return model.is_model
+        if resource_type == "audit":
+            return isinstance(model, StandaloneAudit)
+        raise SQLMeshError(f"Unsupported resource type: {resource_type}")
+class DbtSelector(Selector):
+    """Implementation of selectors that matches objects based on the DBT names instead of the SQLMesh native names"""
+    def _model_name(self, model: Node) -> str:
+        if dbt_fqn := model.dbt_fqn:
+            return dbt_fqn
+        raise SQLMeshError("dbt node information must be populated to use dbt selectors")
+    def _pattern_to_model_fqns(self, pattern: str, all_models: t.Dict[str, Node]) -> t.Set[str]:
+        # a pattern like "staging.customers" should match a model called "jaffle_shop.staging.customers"
+        # but not a model called "jaffle_shop.customers.staging"
+        # also a pattern like "aging" should not match "staging" so we need to consider components; not substrings
+        pattern_components = pattern.split(".")
+        first_pattern_component = pattern_components[0]
+        matches = set()
+        for fqn, model in all_models.items():
+            if not model.dbt_fqn:
+                continue
+            dbt_fqn_components = model.dbt_fqn.split(".")
+            try:
+                starting_idx = dbt_fqn_components.index(first_pattern_component)
+            except ValueError:
+                continue
+            for pattern_component, fqn_component in zip_longest(
+                pattern_components, dbt_fqn_components[starting_idx:]
+            ):
+                if pattern_component and not fqn_component:
+                    # the pattern still goes but we have run out of fqn components to match; no match
+                    break
+                if fqn_component and not pattern_component:
+                    # all elements of the pattern have matched elements of the fqn; match
+                    matches.add(fqn)
+                    break
+                if pattern_component != fqn_component:
+                    # the pattern explicitly doesnt match a component; no match
+                    break
+            else:
+                # called if no explicit break, indicating all components of the pattern matched all components of the fqn
+                matches.add(fqn)
+        return matches
+    def _matches_resource_type(self, resource_type: str, model: Node) -> bool:
+        """
+        ref: https://docs.getdbt.com/reference/node-selection/methods#resource_type
+        # supported by SQLMesh
+        "model"
+        "seed"
+        "source" # external model
+        "test" # standalone audit
+        # not supported by SQLMesh yet, commented out to throw an error if someone tries to use them
+        "analysis"
+        "exposure"
+        "metric"
+        "saved_query"
+        "semantic_model"
+        "snapshot"
+        "unit_test"
+        """
+        if resource_type not in ("model", "seed", "source", "test"):
+            raise SQLMeshError(f"Unsupported resource type: {resource_type}")
+        if isinstance(model, StandaloneAudit):
+            return resource_type == "test"
+        if resource_type == "model":
+            return model.is_model and not model.kind.is_external and not model.kind.is_seed
+        if resource_type == "source":
+            return model.kind.is_external
+        if resource_type == "seed":
+            return model.kind.is_seed
+        return False
 class SelectorDialect(Dialect):
     IDENTIFIERS_CAN_START_WITH_DIGIT = True
@@ -271,6 +392,10 @@ class Tag(exp.Expression):
     pass
+class ResourceType(exp.Expression):
+    pass
 class Direction(exp.Expression):
     pass
@@ -323,7 +448,8 @@ def parse(selector: str, dialect: DialectType = None) -> exp.Expression:
         upstream = _match(TokenType.PLUS)
         downstream = None
         tag = _parse_kind("tag")
-        git = False if tag else _parse_kind("git")
+        resource_type = False if tag else _parse_kind("resource_type")
+        git = False if resource_type else _parse_kind("git")
         lstar = "*" if _match(TokenType.STAR) else ""
         directions = {}
@@ -349,6 +475,8 @@ def parse(selector: str, dialect: DialectType = None) -> exp.Expression:
         if tag:
             this = Tag(this=this)
+        if resource_type:
+            this = ResourceType(this=this)
         if git:
             this = Git(this=this)
         if directions:

sqlmesh/core/signal.py CHANGED Viewed

@@ -1,7 +1,14 @@
 from __future__ import annotations
+import typing as t
 from sqlmesh.utils import UniqueKeyDict, registry_decorator
+from sqlmesh.utils.errors import MissingSourceError
+if t.TYPE_CHECKING:
+    from sqlmesh.core.context import ExecutionContext
+    from sqlmesh.core.snapshot.definition import Snapshot
+    from sqlmesh.utils.date import DatetimeRanges
+    from sqlmesh.core.snapshot.definition import DeployabilityIndex
 class signal(registry_decorator):
@@ -33,3 +40,59 @@ class signal(registry_decorator):
 SignalRegistry = UniqueKeyDict[str, signal]
+@signal()
+def freshness(
+    batch: DatetimeRanges,
+    snapshot: Snapshot,
+    context: ExecutionContext,
+) -> bool:
+    """
+    Implements model freshness as a signal, i.e it considers this model to be fresh if:
+    - Any upstream SQLMesh model has available intervals to compute i.e is fresh
+    - Any upstream external model has been altered since the last time the model was evaluated
+    """
+    adapter = context.engine_adapter
+    if context.is_restatement or not adapter.SUPPORTS_METADATA_TABLE_LAST_MODIFIED_TS:
+        return True
+    deployability_index = context.deployability_index or DeployabilityIndex.all_deployable()
+    last_altered_ts = (
+        snapshot.last_altered_ts
+        if deployability_index.is_deployable(snapshot)
+        else snapshot.dev_last_altered_ts
+    )
+    if not last_altered_ts:
+        return True
+    parent_snapshots = {context.snapshots[p.name] for p in snapshot.parents}
+    upstream_parent_snapshots = {p for p in parent_snapshots if not p.is_external}
+    external_parents = snapshot.node.depends_on - {p.name for p in upstream_parent_snapshots}
+    if context.parent_intervals:
+        # At least one upstream sqlmesh model has intervals to compute (i.e is fresh),
+        # so the current model is considered fresh too
+        return True
+    if external_parents:
+        external_last_altered_timestamps = adapter.get_table_last_modified_ts(
+            list(external_parents)
+        )
+        if len(external_last_altered_timestamps) != len(external_parents):
+            raise MissingSourceError(
+                f"Expected {len(external_parents)} sources to be present, but got {len(external_last_altered_timestamps)}."
+            )
+        # Finding new data means that the upstream depedencies have been altered
+        # since the last time the model was evaluated
+        return any(
+            external_last_altered_ts > last_altered_ts
+            for external_last_altered_ts in external_last_altered_timestamps
+        )
+    return False

sqlmesh 0.217.1.dev1__py3-none-any.whl → 0.227.2.dev20__py3-none-any.whl

sqlmesh 0.217.1.dev1py3-none-any.whl → 0.227.2.dev20py3-none-any.whl