PyPI - sqlmesh - Versions diffs - 0.217.1.dev1__py3-none-any.whl → 0.227.2.dev4__py3-none-any.whl - Mend

sqlmesh 0.217.1.dev1py3-none-any.whl → 0.227.2.dev4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (183) hide show

sqlmesh/__init__.py +12 -2
sqlmesh/_version.py +2 -2
sqlmesh/cli/project_init.py +10 -2
sqlmesh/core/_typing.py +1 -0
sqlmesh/core/audit/definition.py +8 -2
sqlmesh/core/config/__init__.py +1 -1
sqlmesh/core/config/connection.py +17 -5
sqlmesh/core/config/dbt.py +13 -0
sqlmesh/core/config/janitor.py +12 -0
sqlmesh/core/config/loader.py +7 -0
sqlmesh/core/config/model.py +2 -0
sqlmesh/core/config/root.py +3 -0
sqlmesh/core/console.py +80 -2
sqlmesh/core/constants.py +1 -1
sqlmesh/core/context.py +61 -25
sqlmesh/core/dialect.py +3 -0
sqlmesh/core/engine_adapter/_typing.py +2 -0
sqlmesh/core/engine_adapter/base.py +322 -22
sqlmesh/core/engine_adapter/base_postgres.py +17 -1
sqlmesh/core/engine_adapter/bigquery.py +146 -7
sqlmesh/core/engine_adapter/clickhouse.py +17 -13
sqlmesh/core/engine_adapter/databricks.py +33 -2
sqlmesh/core/engine_adapter/fabric.py +1 -29
sqlmesh/core/engine_adapter/mixins.py +142 -48
sqlmesh/core/engine_adapter/mssql.py +15 -4
sqlmesh/core/engine_adapter/mysql.py +2 -2
sqlmesh/core/engine_adapter/postgres.py +9 -3
sqlmesh/core/engine_adapter/redshift.py +4 -0
sqlmesh/core/engine_adapter/risingwave.py +1 -0
sqlmesh/core/engine_adapter/shared.py +6 -0
sqlmesh/core/engine_adapter/snowflake.py +82 -11
sqlmesh/core/engine_adapter/spark.py +14 -10
sqlmesh/core/engine_adapter/trino.py +4 -2
sqlmesh/core/janitor.py +181 -0
sqlmesh/core/lineage.py +1 -0
sqlmesh/core/macros.py +35 -13
sqlmesh/core/model/common.py +2 -0
sqlmesh/core/model/definition.py +65 -4
sqlmesh/core/model/kind.py +66 -2
sqlmesh/core/model/meta.py +107 -2
sqlmesh/core/node.py +101 -2
sqlmesh/core/plan/builder.py +15 -10
sqlmesh/core/plan/common.py +196 -2
sqlmesh/core/plan/definition.py +21 -6
sqlmesh/core/plan/evaluator.py +72 -113
sqlmesh/core/plan/explainer.py +90 -8
sqlmesh/core/plan/stages.py +42 -21
sqlmesh/core/renderer.py +26 -18
sqlmesh/core/scheduler.py +60 -19
sqlmesh/core/selector.py +137 -9
sqlmesh/core/signal.py +64 -1
sqlmesh/core/snapshot/__init__.py +1 -0
sqlmesh/core/snapshot/definition.py +109 -25
sqlmesh/core/snapshot/evaluator.py +610 -50
sqlmesh/core/state_sync/__init__.py +0 -1
sqlmesh/core/state_sync/base.py +31 -27
sqlmesh/core/state_sync/cache.py +12 -4
sqlmesh/core/state_sync/common.py +216 -111
sqlmesh/core/state_sync/db/facade.py +30 -15
sqlmesh/core/state_sync/db/interval.py +27 -7
sqlmesh/core/state_sync/db/migrator.py +14 -8
sqlmesh/core/state_sync/db/snapshot.py +119 -87
sqlmesh/core/table_diff.py +2 -2
sqlmesh/core/test/definition.py +14 -9
sqlmesh/dbt/adapter.py +20 -11
sqlmesh/dbt/basemodel.py +52 -41
sqlmesh/dbt/builtin.py +27 -11
sqlmesh/dbt/column.py +17 -5
sqlmesh/dbt/common.py +4 -2
sqlmesh/dbt/context.py +14 -1
sqlmesh/dbt/loader.py +60 -8
sqlmesh/dbt/manifest.py +136 -8
sqlmesh/dbt/model.py +105 -25
sqlmesh/dbt/package.py +16 -1
sqlmesh/dbt/profile.py +3 -3
sqlmesh/dbt/project.py +12 -7
sqlmesh/dbt/seed.py +1 -1
sqlmesh/dbt/source.py +6 -1
sqlmesh/dbt/target.py +25 -6
sqlmesh/dbt/test.py +31 -1
sqlmesh/migrations/v0000_baseline.py +3 -6
sqlmesh/migrations/v0061_mysql_fix_blob_text_type.py +2 -5
sqlmesh/migrations/v0062_add_model_gateway.py +2 -2
sqlmesh/migrations/v0063_change_signals.py +2 -4
sqlmesh/migrations/v0064_join_when_matched_strings.py +2 -4
sqlmesh/migrations/v0065_add_model_optimize.py +2 -2
sqlmesh/migrations/v0066_add_auto_restatements.py +2 -6
sqlmesh/migrations/v0067_add_tsql_date_full_precision.py +2 -2
sqlmesh/migrations/v0068_include_unrendered_query_in_metadata_hash.py +2 -2
sqlmesh/migrations/v0069_update_dev_table_suffix.py +2 -4
sqlmesh/migrations/v0070_include_grains_in_metadata_hash.py +2 -2
sqlmesh/migrations/v0071_add_dev_version_to_intervals.py +2 -6
sqlmesh/migrations/v0072_add_environment_statements.py +2 -4
sqlmesh/migrations/v0073_remove_symbolic_disable_restatement.py +2 -4
sqlmesh/migrations/v0074_add_partition_by_time_column_property.py +2 -2
sqlmesh/migrations/v0075_remove_validate_query.py +2 -4
sqlmesh/migrations/v0076_add_cron_tz.py +2 -2
sqlmesh/migrations/v0077_fix_column_type_hash_calculation.py +2 -2
sqlmesh/migrations/v0078_warn_if_non_migratable_python_env.py +2 -4
sqlmesh/migrations/v0079_add_gateway_managed_property.py +7 -9
sqlmesh/migrations/v0080_add_batch_size_to_scd_type_2_models.py +2 -2
sqlmesh/migrations/v0081_update_partitioned_by.py +2 -4
sqlmesh/migrations/v0082_warn_if_incorrectly_duplicated_statements.py +2 -4
sqlmesh/migrations/v0083_use_sql_for_scd_time_data_type_data_hash.py +2 -2
sqlmesh/migrations/v0084_normalize_quote_when_matched_and_merge_filter.py +2 -2
sqlmesh/migrations/v0085_deterministic_repr.py +2 -4
sqlmesh/migrations/v0086_check_deterministic_bug.py +2 -4
sqlmesh/migrations/v0087_normalize_blueprint_variables.py +2 -4
sqlmesh/migrations/v0088_warn_about_variable_python_env_diffs.py +2 -4
sqlmesh/migrations/v0089_add_virtual_environment_mode.py +2 -2
sqlmesh/migrations/v0090_add_forward_only_column.py +2 -6
sqlmesh/migrations/v0091_on_additive_change.py +2 -2
sqlmesh/migrations/v0092_warn_about_dbt_data_type_diff.py +2 -4
sqlmesh/migrations/v0093_use_raw_sql_in_fingerprint.py +2 -2
sqlmesh/migrations/v0094_add_dev_version_and_fingerprint_columns.py +2 -6
sqlmesh/migrations/v0095_warn_about_dbt_raw_sql_diff.py +2 -4
sqlmesh/migrations/v0096_remove_plan_dags_table.py +2 -4
sqlmesh/migrations/v0097_add_dbt_name_in_node.py +2 -2
sqlmesh/migrations/v0098_add_dbt_node_info_in_node.py +103 -0
sqlmesh/migrations/v0099_add_last_altered_to_intervals.py +25 -0
sqlmesh/migrations/v0100_add_grants_and_grants_target_layer.py +9 -0
sqlmesh/utils/__init__.py +8 -1
sqlmesh/utils/cache.py +5 -1
sqlmesh/utils/date.py +1 -1
sqlmesh/utils/errors.py +4 -0
sqlmesh/utils/jinja.py +25 -2
sqlmesh/utils/pydantic.py +6 -6
sqlmesh/utils/windows.py +13 -3
{sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev4.dist-info}/METADATA +5 -5
{sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev4.dist-info}/RECORD +181 -176
sqlmesh_dbt/cli.py +70 -7
sqlmesh_dbt/console.py +14 -6
sqlmesh_dbt/operations.py +103 -24
sqlmesh_dbt/selectors.py +39 -1
web/client/dist/assets/{Audits-Ucsx1GzF.js → Audits-CBiYyyx-.js} +1 -1
web/client/dist/assets/{Banner-BWDzvavM.js → Banner-DSRbUlO5.js} +1 -1
web/client/dist/assets/{ChevronDownIcon-D2VL13Ah.js → ChevronDownIcon-MK_nrjD_.js} +1 -1
web/client/dist/assets/{ChevronRightIcon-DWGYbf1l.js → ChevronRightIcon-CLWtT22Q.js} +1 -1
web/client/dist/assets/{Content-DdHDZM3I.js → Content-BNuGZN5l.js} +1 -1
web/client/dist/assets/{Content-Bikfy8fh.js → Content-CSHJyW0n.js} +1 -1
web/client/dist/assets/{Data-CzAJH7rW.js → Data-C1oRDbLx.js} +1 -1
web/client/dist/assets/{DataCatalog-BJF11g8f.js → DataCatalog-HXyX2-_j.js} +1 -1
web/client/dist/assets/{Editor-s0SBpV2y.js → Editor-BDyfpUuw.js} +1 -1
web/client/dist/assets/{Editor-DgLhgKnm.js → Editor-D0jNItwC.js} +1 -1
web/client/dist/assets/{Errors-D0m0O1d3.js → Errors-BfuFLcPi.js} +1 -1
web/client/dist/assets/{FileExplorer-CEv0vXkt.js → FileExplorer-BR9IE3he.js} +1 -1
web/client/dist/assets/{Footer-BwzXn8Ew.js → Footer-CgBEtiAh.js} +1 -1
web/client/dist/assets/{Header-6heDkEqG.js → Header-DSqR6nSO.js} +1 -1
web/client/dist/assets/{Input-obuJsD6k.js → Input-B-oZ6fGO.js} +1 -1
web/client/dist/assets/Lineage-DYQVwDbD.js +1 -0
web/client/dist/assets/{ListboxShow-HM9_qyrt.js → ListboxShow-BE5-xevs.js} +1 -1
web/client/dist/assets/{ModelLineage-zWdKo0U2.js → ModelLineage-DkIFAYo4.js} +1 -1
web/client/dist/assets/{Models-Bcu66SRz.js → Models-D5dWr8RB.js} +1 -1
web/client/dist/assets/{Page-BWEEQfIt.js → Page-C-XfU5BR.js} +1 -1
web/client/dist/assets/{Plan-C4gXCqlf.js → Plan-ZEuTINBq.js} +1 -1
web/client/dist/assets/{PlusCircleIcon-CVDO651q.js → PlusCircleIcon-DVXAHG8_.js} +1 -1
web/client/dist/assets/{ReportErrors-BT6xFwAr.js → ReportErrors-B7FEPzMB.js} +1 -1
web/client/dist/assets/{Root-ryJoBK4h.js → Root-8aZyhPxF.js} +1 -1
web/client/dist/assets/{SearchList-DB04sPb9.js → SearchList-W_iT2G82.js} +1 -1
web/client/dist/assets/{SelectEnvironment-CUYcXUu6.js → SelectEnvironment-C65jALmO.js} +1 -1
web/client/dist/assets/{SourceList-Doo_9ZGp.js → SourceList-DSLO6nVJ.js} +1 -1
web/client/dist/assets/{SourceListItem-D5Mj7Dly.js → SourceListItem-BHt8d9-I.js} +1 -1
web/client/dist/assets/{SplitPane-qHmkD1qy.js → SplitPane-CViaZmw6.js} +1 -1
web/client/dist/assets/{Tests-DH1Z74ML.js → Tests-DhaVt5t1.js} +1 -1
web/client/dist/assets/{Welcome-DqUJUNMF.js → Welcome-DvpjH-_4.js} +1 -1
web/client/dist/assets/context-BctCsyGb.js +71 -0
web/client/dist/assets/{context-Dr54UHLi.js → context-DFNeGsFF.js} +1 -1
web/client/dist/assets/{editor-DYIP1yQ4.js → editor-CcO28cqd.js} +1 -1
web/client/dist/assets/{file-DarlIDVi.js → file-CvJN3aZO.js} +1 -1
web/client/dist/assets/{floating-ui.react-dom-BH3TFvkM.js → floating-ui.react-dom-CjE-JNW1.js} +1 -1
web/client/dist/assets/{help-Bl8wqaQc.js → help-DuPhjipa.js} +1 -1
web/client/dist/assets/{index-D1sR7wpN.js → index-C-dJH7yZ.js} +1 -1
web/client/dist/assets/{index-O3mjYpnE.js → index-Dj0i1-CA.js} +2 -2
web/client/dist/assets/{plan-CehRrJUG.js → plan-BTRSbjKn.js} +1 -1
web/client/dist/assets/{popover-CqgMRE0G.js → popover-_Sf0yvOI.js} +1 -1
web/client/dist/assets/{project-6gxepOhm.js → project-BvSOI8MY.js} +1 -1
web/client/dist/index.html +1 -1
web/client/dist/assets/Lineage-D0Hgdz2v.js +0 -1
web/client/dist/assets/context-DgX0fp2E.js +0 -68
{sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev4.dist-info}/WHEEL +0 -0
{sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev4.dist-info}/entry_points.txt +0 -0
{sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev4.dist-info}/licenses/LICENSE +0 -0
{sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev4.dist-info}/top_level.txt +0 -0

sqlmesh/core/plan/evaluator.py CHANGED Viewed

@@ -22,7 +22,7 @@ from sqlmesh.core import constants as c
 from sqlmesh.core.console import Console, get_console
 from sqlmesh.core.environment import EnvironmentNamingInfo, execute_environment_statements
 from sqlmesh.core.macros import RuntimeStage
-from sqlmesh.core.snapshot.definition import Interval, to_view_mapping
+from sqlmesh.core.snapshot.definition import to_view_mapping, SnapshotTableInfo
 from sqlmesh.core.plan import stages
 from sqlmesh.core.plan.definition import EvaluatablePlan
 from sqlmesh.core.scheduler import Scheduler
@@ -33,17 +33,15 @@ from sqlmesh.core.snapshot import (
     SnapshotIntervals,
     SnapshotId,
     SnapshotInfoLike,
-    SnapshotTableInfo,
     SnapshotCreationFailedError,
-    SnapshotNameVersion,
 )
 from sqlmesh.utils import to_snake_case
 from sqlmesh.core.state_sync import StateSync
+from sqlmesh.core.plan.common import identify_restatement_intervals_across_snapshot_versions
 from sqlmesh.utils import CorrelationId
 from sqlmesh.utils.concurrency import NodeExecutionFailedError
-from sqlmesh.utils.errors import PlanError, SQLMeshError
-from sqlmesh.utils.dag import DAG
-from sqlmesh.utils.date import now
+from sqlmesh.utils.errors import PlanError, ConflictingPlanError, SQLMeshError
+from sqlmesh.utils.date import now, to_timestamp
 logger = logging.getLogger(__name__)
@@ -260,6 +258,7 @@ class BuiltInPlanEvaluator(PlanEvaluator):
             allow_additive_snapshots=plan.allow_additive_models,
             selected_snapshot_ids=stage.selected_snapshot_ids,
             selected_models=plan.selected_models,
+            is_restatement=bool(plan.restatements),
         )
         if errors:
             raise PlanError("Plan application failed.")
@@ -289,27 +288,78 @@ class BuiltInPlanEvaluator(PlanEvaluator):
     def visit_restatement_stage(
         self, stage: stages.RestatementStage, plan: EvaluatablePlan
     ) -> None:
-        snapshot_intervals_to_restate = {(s, i) for s, i in stage.snapshot_intervals.items()}
-        # Restating intervals on prod plans should mean that the intervals are cleared across
-        # all environments, not just the version currently in prod
-        # This ensures that work done in dev environments can still be promoted to prod
-        # by forcing dev environments to re-run intervals that changed in prod
+        # Restating intervals on prod plans means that once the data for the intervals being restated has been backfilled
+        # (which happens in the backfill stage) then we need to clear those intervals *from state* across all other environments.
+        #
+        # This ensures that work done in dev environments can still be promoted to prod by forcing dev environments to
+        # re-run intervals that changed in prod (because after this stage runs they are cleared from state and thus show as missing)
+        #
+        # It also means that any new dev environments created while this restatement plan was running also get the
+        # correct intervals cleared because we look up matching snapshots as at right now and not as at the time the plan
+        # was created, which could have been several hours ago if there was a lot of data to restate.
         #
         # Without this rule, its possible that promoting a dev table to prod will introduce old data to prod
-        snapshot_intervals_to_restate.update(
-            self._restatement_intervals_across_all_environments(
-                prod_restatements=plan.restatements,
-                disable_restatement_models=plan.disabled_restatement_models,
-                loaded_snapshots={s.snapshot_id: s for s in stage.all_snapshots.values()},
-            )
-        )
-        self.state_sync.remove_intervals(
-            snapshot_intervals=list(snapshot_intervals_to_restate),
-            remove_shared_versions=plan.is_prod,
+        intervals_to_clear = identify_restatement_intervals_across_snapshot_versions(
+            state_reader=self.state_sync,
+            prod_restatements=plan.restatements,
+            disable_restatement_models=plan.disabled_restatement_models,
+            loaded_snapshots={s.snapshot_id: s for s in stage.all_snapshots.values()},
+            current_ts=to_timestamp(plan.execution_time or now()),
         )
+        if not intervals_to_clear:
+            # Nothing to do
+            return
+        # While the restatements were being processed, did any of the snapshots being restated get new versions deployed?
+        # If they did, they will not reflect the data that just got restated, so we need to notify the user
+        deployed_during_restatement: t.Dict[
+            str, t.Tuple[SnapshotTableInfo, SnapshotTableInfo]
+        ] = {}  # tuple of (restated_snapshot, current_prod_snapshot)
+        if deployed_env := self.state_sync.get_environment(plan.environment.name):
+            promoted_snapshots_by_name = {s.name: s for s in deployed_env.snapshots}
+            for name in plan.restatements:
+                snapshot = stage.all_snapshots[name]
+                version = snapshot.table_info.version
+                if (
+                    prod_snapshot := promoted_snapshots_by_name.get(name)
+                ) and prod_snapshot.version != version:
+                    deployed_during_restatement[name] = (
+                        snapshot.table_info,
+                        prod_snapshot.table_info,
+                    )
+        # we need to *not* clear the intervals on the snapshots where new versions were deployed while the restatement was running in order to prevent
+        # subsequent plans from having unexpected intervals to backfill.
+        # we instead list the affected models and abort the plan with an error so the user can decide what to do
+        # (either re-attempt the restatement plan or leave things as they are)
+        filtered_intervals_to_clear = [
+            (s.snapshot, s.interval)
+            for s in intervals_to_clear.values()
+            if s.snapshot.name not in deployed_during_restatement
+        ]
+        if filtered_intervals_to_clear:
+            # We still clear intervals in other envs for models that were successfully restated without having new versions promoted during restatement
+            self.state_sync.remove_intervals(
+                snapshot_intervals=filtered_intervals_to_clear,
+                remove_shared_versions=plan.is_prod,
+            )
+        if deployed_env and deployed_during_restatement:
+            self.console.log_models_updated_during_restatement(
+                list(deployed_during_restatement.values()),
+                plan.environment.naming_info,
+                self.default_catalog,
+            )
+            raise ConflictingPlanError(
+                f"Another plan ({deployed_env.summary.plan_id}) deployed new versions of {len(deployed_during_restatement)} models in the target environment '{plan.environment.name}' while they were being restated by this plan.\n"
+                "Please re-apply your plan if these new versions should be restated."
+            )
     def visit_environment_record_update_stage(
         self, stage: stages.EnvironmentRecordUpdateStage, plan: EvaluatablePlan
     ) -> None:
@@ -422,97 +472,6 @@ class BuiltInPlanEvaluator(PlanEvaluator):
             on_complete=on_complete,
         )
-    def _restatement_intervals_across_all_environments(
-        self,
-        prod_restatements: t.Dict[str, Interval],
-        disable_restatement_models: t.Set[str],
-        loaded_snapshots: t.Dict[SnapshotId, Snapshot],
-    ) -> t.Set[t.Tuple[SnapshotTableInfo, Interval]]:
-        """
-        Given a map of snapshot names + intervals to restate in prod:
-         - Look up matching snapshots across all environments (match based on name - regardless of version)
-         - For each match, also match downstream snapshots while filtering out models that have restatement disabled
-         - Return all matches mapped to the intervals of the prod snapshot being restated
-        The goal here is to produce a list of intervals to invalidate across all environments so that a cadence
-        run in those environments causes the intervals to be repopulated
-        """
-        if not prod_restatements:
-            return set()
-        prod_name_versions: t.Set[SnapshotNameVersion] = {
-            s.name_version for s in loaded_snapshots.values()
-        }
-        snapshots_to_restate: t.Dict[SnapshotId, t.Tuple[SnapshotTableInfo, Interval]] = {}
-        for env_summary in self.state_sync.get_environments_summary():
-            # Fetch the full environment object one at a time to avoid loading all environments into memory at once
-            env = self.state_sync.get_environment(env_summary.name)
-            if not env:
-                logger.warning("Environment %s not found", env_summary.name)
-                continue
-            keyed_snapshots = {s.name: s.table_info for s in env.snapshots}
-            # We dont just restate matching snapshots, we also have to restate anything downstream of them
-            # so that if A gets restated in prod and dev has A <- B <- C, B and C get restated in dev
-            env_dag = DAG({s.name: {p.name for p in s.parents} for s in env.snapshots})
-            for restatement, intervals in prod_restatements.items():
-                if restatement not in keyed_snapshots:
-                    continue
-                affected_snapshot_names = [
-                    x
-                    for x in ([restatement] + env_dag.downstream(restatement))
-                    if x not in disable_restatement_models
-                ]
-                snapshots_to_restate.update(
-                    {
-                        keyed_snapshots[a].snapshot_id: (keyed_snapshots[a], intervals)
-                        for a in affected_snapshot_names
-                        # Don't restate a snapshot if it shares the version with a snapshot in prod
-                        if keyed_snapshots[a].name_version not in prod_name_versions
-                    }
-                )
-        # for any affected full_history_restatement_only snapshots, we need to widen the intervals being restated to
-        # include the whole time range for that snapshot. This requires a call to state to load the full snapshot record,
-        # so we only do it if necessary
-        full_history_restatement_snapshot_ids = [
-            # FIXME: full_history_restatement_only is just one indicator that the snapshot can only be fully refreshed, the other one is Model.depends_on_self
-            # however, to figure out depends_on_self, we have to render all the model queries which, alongside having to fetch full snapshots from state,
-            # is problematic in secure environments that are deliberately isolated from arbitrary user code (since rendering a query may require user macros to be present)
-            # So for now, these are not considered
-            s_id
-            for s_id, s in snapshots_to_restate.items()
-            if s[0].full_history_restatement_only
-        ]
-        if full_history_restatement_snapshot_ids:
-            # only load full snapshot records that we havent already loaded
-            additional_snapshots = self.state_sync.get_snapshots(
-                [
-                    s.snapshot_id
-                    for s in full_history_restatement_snapshot_ids
-                    if s.snapshot_id not in loaded_snapshots
-                ]
-            )
-            all_snapshots = loaded_snapshots | additional_snapshots
-            for full_snapshot_id in full_history_restatement_snapshot_ids:
-                full_snapshot = all_snapshots[full_snapshot_id]
-                _, original_intervals = snapshots_to_restate[full_snapshot_id]
-                original_start, original_end = original_intervals
-                # get_removal_interval() widens intervals if necessary
-                new_intervals = full_snapshot.get_removal_interval(
-                    start=original_start, end=original_end
-                )
-                snapshots_to_restate[full_snapshot_id] = (full_snapshot.table_info, new_intervals)
-        return set(snapshots_to_restate.values())
     def _update_intervals_for_new_snapshots(self, snapshots: t.Collection[Snapshot]) -> None:
         snapshots_intervals: t.List[SnapshotIntervals] = []
         for snapshot in snapshots:

sqlmesh/core/plan/explainer.py CHANGED Viewed

@@ -1,6 +1,10 @@
+from __future__ import annotations
 import abc
 import typing as t
 import logging
+from dataclasses import dataclass
+from collections import defaultdict
 from rich.console import Console as RichConsole
 from rich.tree import Tree
@@ -8,6 +12,10 @@ from sqlglot.dialects.dialect import DialectType
 from sqlmesh.core import constants as c
 from sqlmesh.core.console import Console, TerminalConsole, get_console
 from sqlmesh.core.environment import EnvironmentNamingInfo
+from sqlmesh.core.plan.common import (
+    SnapshotIntervalClearRequest,
+    identify_restatement_intervals_across_snapshot_versions,
+)
 from sqlmesh.core.plan.definition import EvaluatablePlan, SnapshotIntervals
 from sqlmesh.core.plan import stages
 from sqlmesh.core.plan.evaluator import (
@@ -16,6 +24,8 @@ from sqlmesh.core.plan.evaluator import (
 from sqlmesh.core.state_sync import StateReader
 from sqlmesh.core.snapshot.definition import (
     SnapshotInfoMixin,
+    SnapshotIdAndVersion,
+    model_display_name,
 )
 from sqlmesh.utils import Verbosity, rich as srich, to_snake_case
 from sqlmesh.utils.date import to_ts
@@ -45,6 +55,15 @@ class PlanExplainer(PlanEvaluator):
         explainer_console = _get_explainer_console(
             self.console, plan.environment, self.default_catalog
         )
+        # add extra metadata that's only needed at this point for better --explain output
+        plan_stages = [
+            ExplainableRestatementStage.from_restatement_stage(stage, self.state_reader, plan)
+            if isinstance(stage, stages.RestatementStage)
+            else stage
+            for stage in plan_stages
+        ]
         explainer_console.explain(plan_stages)
@@ -54,6 +73,41 @@ class ExplainerConsole(abc.ABC):
         pass
+@dataclass
+class ExplainableRestatementStage(stages.RestatementStage):
+    """
+    This brings forward some calculations that would usually be done in the evaluator so the user can be given a better indication
+    of what might happen when they ask for the plan to be explained
+    """
+    snapshot_intervals_to_clear: t.Dict[str, t.List[SnapshotIntervalClearRequest]]
+    """Which snapshots from other environments would have intervals cleared as part of restatement, grouped by name."""
+    @classmethod
+    def from_restatement_stage(
+        cls: t.Type[ExplainableRestatementStage],
+        stage: stages.RestatementStage,
+        state_reader: StateReader,
+        plan: EvaluatablePlan,
+    ) -> ExplainableRestatementStage:
+        all_restatement_intervals = identify_restatement_intervals_across_snapshot_versions(
+            state_reader=state_reader,
+            prod_restatements=plan.restatements,
+            disable_restatement_models=plan.disabled_restatement_models,
+            loaded_snapshots={s.snapshot_id: s for s in stage.all_snapshots.values()},
+        )
+        # Group the interval clear requests by snapshot name to make them easier to write to the console
+        snapshot_intervals_to_clear = defaultdict(list)
+        for clear_request in all_restatement_intervals.values():
+            snapshot_intervals_to_clear[clear_request.snapshot.name].append(clear_request)
+        return cls(
+            snapshot_intervals_to_clear=snapshot_intervals_to_clear,
+            all_snapshots=stage.all_snapshots,
+        )
 MAX_TREE_LENGTH = 10
@@ -146,11 +200,37 @@ class RichExplainerConsole(ExplainerConsole):
             tree.add(display_name)
         return tree
-    def visit_restatement_stage(self, stage: stages.RestatementStage) -> Tree:
-        tree = Tree("[bold]Invalidate data intervals as part of restatement[/bold]")
-        for snapshot_table_info, interval in stage.snapshot_intervals.items():
-            display_name = self._display_name(snapshot_table_info)
-            tree.add(f"{display_name} [{to_ts(interval[0])} - {to_ts(interval[1])}]")
+    def visit_explainable_restatement_stage(self, stage: ExplainableRestatementStage) -> Tree:
+        return self.visit_restatement_stage(stage)
+    def visit_restatement_stage(
+        self, stage: t.Union[ExplainableRestatementStage, stages.RestatementStage]
+    ) -> Tree:
+        tree = Tree(
+            "[bold]Invalidate data intervals in state for development environments to prevent old data from being promoted[/bold]\n"
+            "This only affects state and will not clear physical data from the tables until the next plan for each environment"
+        )
+        if isinstance(stage, ExplainableRestatementStage) and (
+            snapshot_intervals := stage.snapshot_intervals_to_clear
+        ):
+            for name, clear_requests in snapshot_intervals.items():
+                display_name = model_display_name(
+                    name, self.environment_naming_info, self.default_catalog, self.dialect
+                )
+                interval_start = min(cr.interval[0] for cr in clear_requests)
+                interval_end = max(cr.interval[1] for cr in clear_requests)
+                if not interval_start or not interval_end:
+                    continue
+                node = tree.add(f"{display_name} [{to_ts(interval_start)} - {to_ts(interval_end)}]")
+                all_environment_names = sorted(
+                    set(env_name for cr in clear_requests for env_name in cr.environment_names)
+                )
+                node.add("in environments: " + ", ".join(all_environment_names))
         return tree
     def visit_backfill_stage(self, stage: stages.BackfillStage) -> Tree:
@@ -265,12 +345,14 @@ class RichExplainerConsole(ExplainerConsole):
     def _display_name(
         self,
-        snapshot: SnapshotInfoMixin,
+        snapshot: t.Union[SnapshotInfoMixin, SnapshotIdAndVersion],
         environment_naming_info: t.Optional[EnvironmentNamingInfo] = None,
     ) -> str:
         return snapshot.display_name(
-            environment_naming_info or self.environment_naming_info,
-            self.default_catalog if self.verbosity < Verbosity.VERY_VERBOSE else None,
+            environment_naming_info=environment_naming_info or self.environment_naming_info,
+            default_catalog=self.default_catalog
+            if self.verbosity < Verbosity.VERY_VERBOSE
+            else None,
             dialect=self.dialect,
         )

sqlmesh/core/plan/stages.py CHANGED Viewed

@@ -12,8 +12,9 @@ from sqlmesh.core.snapshot.definition import (
     Snapshot,
     SnapshotTableInfo,
     SnapshotId,
-    Interval,
+    snapshots_to_dag,
 )
+from sqlmesh.utils.errors import PlanError
 @dataclass
@@ -98,14 +99,19 @@ class AuditOnlyRunStage:
 @dataclass
 class RestatementStage:
-    """Restate intervals for given snapshots.
+    """Clear intervals from state for snapshots in *other* environments, when restatements are requested in prod.
+    This stage is effectively a "marker" stage to trigger the plan evaluator to perform the "clear intervals" logic after the BackfillStage has completed.
+    The "clear intervals" logic is executed just-in-time using the latest state available in order to pick up new snapshots that may have
+    been created while the BackfillStage was running, which is why we do not build a list of snapshots to clear at plan time and defer to evaluation time.
+    Note that this stage is only present on `prod` plans because dev plans do not need to worry about clearing intervals in other environments.
     Args:
-        snapshot_intervals: Intervals to restate.
-        all_snapshots: All snapshots in the plan by name.
+        all_snapshots: All snapshots in the plan by name. Note that this does not include the snapshots from other environments that will get their
+            intervals cleared, it's included here as an optimization to prevent having to re-fetch the current plan's snapshots
     """
-    snapshot_intervals: t.Dict[SnapshotTableInfo, Interval]
     all_snapshots: t.Dict[str, Snapshot]
@@ -244,6 +250,7 @@ class PlanStagesBuilder:
         stored_snapshots = self.state_reader.get_snapshots(plan.environment.snapshots)
         snapshots = {**new_snapshots, **stored_snapshots}
         snapshots_by_name = {s.name: s for s in snapshots.values()}
+        dag = snapshots_to_dag(snapshots.values())
         all_selected_for_backfill_snapshots = {
             s.snapshot_id for s in snapshots.values() if plan.is_selected_for_backfill(s.name)
@@ -261,14 +268,21 @@ class PlanStagesBuilder:
             before_promote_snapshots = {
                 s.snapshot_id
                 for s in snapshots.values()
-                if deployability_index.is_representative(s)
+                if (deployability_index.is_representative(s) or s.is_seed)
                 and plan.is_selected_for_backfill(s.name)
             }
             after_promote_snapshots = all_selected_for_backfill_snapshots - before_promote_snapshots
             deployability_index = DeployabilityIndex.all_deployable()
+            snapshot_ids_with_schema_migration = [
+                s.snapshot_id for s in snapshots.values() if s.requires_schema_migration_in_prod
+            ]
+            # Include all upstream dependencies of snapshots that require schema migration to make sure
+            # the upstream tables are created before the schema updates are applied
             snapshots_with_schema_migration = [
-                s for s in snapshots.values() if s.requires_schema_migration_in_prod
+                snapshots[s_id]
+                for s_id in dag.subdag(*snapshot_ids_with_schema_migration)
+                if snapshots[s_id].supports_schema_migration_in_prod
             ]
         snapshots_to_intervals = self._missing_intervals(
@@ -321,10 +335,6 @@ class PlanStagesBuilder:
         if audit_only_snapshots:
             stages.append(AuditOnlyRunStage(snapshots=list(audit_only_snapshots.values())))
-        restatement_stage = self._get_restatement_stage(plan, snapshots_by_name)
-        if restatement_stage:
-            stages.append(restatement_stage)
         if missing_intervals_before_promote:
             stages.append(
                 BackfillStage(
@@ -349,6 +359,15 @@ class PlanStagesBuilder:
                 )
             )
+        # note: "restatement stage" (which is clearing intervals in state - not actually performing the restatements, that's the backfill stage)
+        # needs to come *after* the backfill stage so that at no time do other plans / runs see empty prod intervals and compete with this plan to try to fill them.
+        # in addition, when we update intervals in state, we only clear intervals from dev snapshots to force dev models to be backfilled based on the new prod data.
+        # we can leave prod intervals alone because by the time this plan finishes, the intervals in state have not actually changed, since restatement replaces
+        # data for existing intervals and does not produce new ones
+        restatement_stage = self._get_restatement_stage(plan, snapshots_by_name)
+        if restatement_stage:
+            stages.append(restatement_stage)
         stages.append(
             EnvironmentRecordUpdateStage(
                 no_gaps_snapshot_names={s.name for s in before_promote_snapshots}
@@ -443,16 +462,18 @@ class PlanStagesBuilder:
     def _get_restatement_stage(
         self, plan: EvaluatablePlan, snapshots_by_name: t.Dict[str, Snapshot]
     ) -> t.Optional[RestatementStage]:
-        snapshot_intervals_to_restate = {}
-        for name, interval in plan.restatements.items():
-            restated_snapshot = snapshots_by_name[name]
-            restated_snapshot.remove_interval(interval)
-            snapshot_intervals_to_restate[restated_snapshot.table_info] = interval
-        if not snapshot_intervals_to_restate or plan.is_dev:
-            return None
-        return RestatementStage(
-            snapshot_intervals=snapshot_intervals_to_restate, all_snapshots=snapshots_by_name
-        )
+        if plan.restate_all_snapshots:
+            if plan.is_dev:
+                raise PlanError(
+                    "Clearing intervals from state across dev model versions is only valid for prod plans"
+                )
+            if plan.restatements:
+                return RestatementStage(
+                    all_snapshots=snapshots_by_name,
+                )
+        return None
     def _get_physical_layer_update_stage(
         self,

sqlmesh/core/renderer.py CHANGED Viewed

@@ -6,7 +6,7 @@ from contextlib import contextmanager
 from functools import partial
 from pathlib import Path
-from sqlglot import exp, parse
+from sqlglot import exp, Dialect
 from sqlglot.errors import SqlglotError
 from sqlglot.helper import ensure_list
 from sqlglot.optimizer.annotate_types import annotate_types
@@ -196,7 +196,14 @@ class BaseExpressionRenderer:
             **kwargs,
         }
+        if this_model:
+            render_kwargs["this_model"] = this_model
+        macro_evaluator.locals.update(render_kwargs)
         variables = kwargs.pop("variables", {})
+        if variables:
+            macro_evaluator.locals.setdefault(c.SQLMESH_VARS, {}).update(variables)
         expressions = [self._expression]
         if isinstance(self._expression, d.Jinja):
@@ -249,23 +256,24 @@ class BaseExpressionRenderer:
                 ) from ex
             if rendered_expression.strip():
-                try:
-                    expressions = [e for e in parse(rendered_expression, read=self._dialect) if e]
-                    if not expressions:
-                        raise ConfigError(f"Failed to parse an expression:\n{self._expression}")
-                except Exception as ex:
-                    raise ConfigError(
-                        f"Could not parse the rendered jinja at '{self._path}'.\n{ex}"
-                    ) from ex
-        if this_model:
-            render_kwargs["this_model"] = this_model
-        macro_evaluator.locals.update(render_kwargs)
-        if variables:
-            macro_evaluator.locals.setdefault(c.SQLMESH_VARS, {}).update(variables)
+                # ensure there is actual SQL and not just comments and non-SQL jinja
+                dialect = Dialect.get_or_raise(self._dialect)
+                tokens = dialect.tokenize(rendered_expression)
+                if tokens:
+                    try:
+                        expressions = [
+                            e for e in dialect.parser().parse(tokens, rendered_expression) if e
+                        ]
+                        if not expressions:
+                            raise ConfigError(
+                                f"Failed to parse an expression:\n{rendered_expression}"
+                            )
+                    except Exception as ex:
+                        raise ConfigError(
+                            f"Could not parse the rendered jinja at '{self._path}'.\n{ex}"
+                        ) from ex
         for definition in self._macro_definitions:
             try:

sqlmesh 0.217.1.dev1__py3-none-any.whl → 0.227.2.dev4__py3-none-any.whl

sqlmesh 0.217.1.dev1py3-none-any.whl → 0.227.2.dev4py3-none-any.whl