sqlmesh 0.217.1.dev1__py3-none-any.whl → 0.227.2.dev20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlmesh/__init__.py +12 -2
- sqlmesh/_version.py +2 -2
- sqlmesh/cli/project_init.py +10 -2
- sqlmesh/core/_typing.py +1 -0
- sqlmesh/core/audit/definition.py +8 -2
- sqlmesh/core/config/__init__.py +1 -1
- sqlmesh/core/config/connection.py +20 -5
- sqlmesh/core/config/dbt.py +13 -0
- sqlmesh/core/config/janitor.py +12 -0
- sqlmesh/core/config/loader.py +7 -0
- sqlmesh/core/config/model.py +2 -0
- sqlmesh/core/config/root.py +3 -0
- sqlmesh/core/console.py +80 -2
- sqlmesh/core/constants.py +1 -1
- sqlmesh/core/context.py +112 -35
- sqlmesh/core/dialect.py +3 -0
- sqlmesh/core/engine_adapter/_typing.py +2 -0
- sqlmesh/core/engine_adapter/base.py +330 -23
- sqlmesh/core/engine_adapter/base_postgres.py +17 -1
- sqlmesh/core/engine_adapter/bigquery.py +146 -7
- sqlmesh/core/engine_adapter/clickhouse.py +17 -13
- sqlmesh/core/engine_adapter/databricks.py +50 -2
- sqlmesh/core/engine_adapter/fabric.py +110 -29
- sqlmesh/core/engine_adapter/mixins.py +142 -48
- sqlmesh/core/engine_adapter/mssql.py +15 -4
- sqlmesh/core/engine_adapter/mysql.py +2 -2
- sqlmesh/core/engine_adapter/postgres.py +9 -3
- sqlmesh/core/engine_adapter/redshift.py +4 -0
- sqlmesh/core/engine_adapter/risingwave.py +1 -0
- sqlmesh/core/engine_adapter/shared.py +6 -0
- sqlmesh/core/engine_adapter/snowflake.py +82 -11
- sqlmesh/core/engine_adapter/spark.py +14 -10
- sqlmesh/core/engine_adapter/trino.py +5 -2
- sqlmesh/core/janitor.py +181 -0
- sqlmesh/core/lineage.py +1 -0
- sqlmesh/core/linter/rules/builtin.py +15 -0
- sqlmesh/core/loader.py +17 -30
- sqlmesh/core/macros.py +35 -13
- sqlmesh/core/model/common.py +2 -0
- sqlmesh/core/model/definition.py +72 -4
- sqlmesh/core/model/kind.py +66 -2
- sqlmesh/core/model/meta.py +107 -2
- sqlmesh/core/node.py +101 -2
- sqlmesh/core/plan/builder.py +15 -10
- sqlmesh/core/plan/common.py +196 -2
- sqlmesh/core/plan/definition.py +21 -6
- sqlmesh/core/plan/evaluator.py +72 -113
- sqlmesh/core/plan/explainer.py +90 -8
- sqlmesh/core/plan/stages.py +42 -21
- sqlmesh/core/renderer.py +26 -18
- sqlmesh/core/scheduler.py +60 -19
- sqlmesh/core/selector.py +137 -9
- sqlmesh/core/signal.py +64 -1
- sqlmesh/core/snapshot/__init__.py +1 -0
- sqlmesh/core/snapshot/definition.py +109 -25
- sqlmesh/core/snapshot/evaluator.py +610 -50
- sqlmesh/core/state_sync/__init__.py +0 -1
- sqlmesh/core/state_sync/base.py +31 -27
- sqlmesh/core/state_sync/cache.py +12 -4
- sqlmesh/core/state_sync/common.py +216 -111
- sqlmesh/core/state_sync/db/facade.py +30 -15
- sqlmesh/core/state_sync/db/interval.py +27 -7
- sqlmesh/core/state_sync/db/migrator.py +14 -8
- sqlmesh/core/state_sync/db/snapshot.py +119 -87
- sqlmesh/core/table_diff.py +2 -2
- sqlmesh/core/test/definition.py +14 -9
- sqlmesh/core/test/discovery.py +4 -0
- sqlmesh/dbt/adapter.py +20 -11
- sqlmesh/dbt/basemodel.py +52 -41
- sqlmesh/dbt/builtin.py +27 -11
- sqlmesh/dbt/column.py +17 -5
- sqlmesh/dbt/common.py +4 -2
- sqlmesh/dbt/context.py +14 -1
- sqlmesh/dbt/loader.py +60 -8
- sqlmesh/dbt/manifest.py +136 -8
- sqlmesh/dbt/model.py +105 -25
- sqlmesh/dbt/package.py +16 -1
- sqlmesh/dbt/profile.py +3 -3
- sqlmesh/dbt/project.py +12 -7
- sqlmesh/dbt/seed.py +1 -1
- sqlmesh/dbt/source.py +6 -1
- sqlmesh/dbt/target.py +25 -6
- sqlmesh/dbt/test.py +31 -1
- sqlmesh/integrations/github/cicd/controller.py +6 -2
- sqlmesh/lsp/context.py +4 -2
- sqlmesh/magics.py +1 -1
- sqlmesh/migrations/v0000_baseline.py +3 -6
- sqlmesh/migrations/v0061_mysql_fix_blob_text_type.py +2 -5
- sqlmesh/migrations/v0062_add_model_gateway.py +2 -2
- sqlmesh/migrations/v0063_change_signals.py +2 -4
- sqlmesh/migrations/v0064_join_when_matched_strings.py +2 -4
- sqlmesh/migrations/v0065_add_model_optimize.py +2 -2
- sqlmesh/migrations/v0066_add_auto_restatements.py +2 -6
- sqlmesh/migrations/v0067_add_tsql_date_full_precision.py +2 -2
- sqlmesh/migrations/v0068_include_unrendered_query_in_metadata_hash.py +2 -2
- sqlmesh/migrations/v0069_update_dev_table_suffix.py +2 -4
- sqlmesh/migrations/v0070_include_grains_in_metadata_hash.py +2 -2
- sqlmesh/migrations/v0071_add_dev_version_to_intervals.py +2 -6
- sqlmesh/migrations/v0072_add_environment_statements.py +2 -4
- sqlmesh/migrations/v0073_remove_symbolic_disable_restatement.py +2 -4
- sqlmesh/migrations/v0074_add_partition_by_time_column_property.py +2 -2
- sqlmesh/migrations/v0075_remove_validate_query.py +2 -4
- sqlmesh/migrations/v0076_add_cron_tz.py +2 -2
- sqlmesh/migrations/v0077_fix_column_type_hash_calculation.py +2 -2
- sqlmesh/migrations/v0078_warn_if_non_migratable_python_env.py +2 -4
- sqlmesh/migrations/v0079_add_gateway_managed_property.py +7 -9
- sqlmesh/migrations/v0080_add_batch_size_to_scd_type_2_models.py +2 -2
- sqlmesh/migrations/v0081_update_partitioned_by.py +2 -4
- sqlmesh/migrations/v0082_warn_if_incorrectly_duplicated_statements.py +2 -4
- sqlmesh/migrations/v0083_use_sql_for_scd_time_data_type_data_hash.py +2 -2
- sqlmesh/migrations/v0084_normalize_quote_when_matched_and_merge_filter.py +2 -2
- sqlmesh/migrations/v0085_deterministic_repr.py +2 -4
- sqlmesh/migrations/v0086_check_deterministic_bug.py +2 -4
- sqlmesh/migrations/v0087_normalize_blueprint_variables.py +2 -4
- sqlmesh/migrations/v0088_warn_about_variable_python_env_diffs.py +2 -4
- sqlmesh/migrations/v0089_add_virtual_environment_mode.py +2 -2
- sqlmesh/migrations/v0090_add_forward_only_column.py +2 -6
- sqlmesh/migrations/v0091_on_additive_change.py +2 -2
- sqlmesh/migrations/v0092_warn_about_dbt_data_type_diff.py +2 -4
- sqlmesh/migrations/v0093_use_raw_sql_in_fingerprint.py +2 -2
- sqlmesh/migrations/v0094_add_dev_version_and_fingerprint_columns.py +2 -6
- sqlmesh/migrations/v0095_warn_about_dbt_raw_sql_diff.py +2 -4
- sqlmesh/migrations/v0096_remove_plan_dags_table.py +2 -4
- sqlmesh/migrations/v0097_add_dbt_name_in_node.py +2 -2
- sqlmesh/migrations/v0098_add_dbt_node_info_in_node.py +103 -0
- sqlmesh/migrations/v0099_add_last_altered_to_intervals.py +25 -0
- sqlmesh/migrations/v0100_add_grants_and_grants_target_layer.py +9 -0
- sqlmesh/utils/__init__.py +8 -1
- sqlmesh/utils/cache.py +5 -1
- sqlmesh/utils/date.py +1 -1
- sqlmesh/utils/errors.py +4 -0
- sqlmesh/utils/git.py +3 -1
- sqlmesh/utils/jinja.py +25 -2
- sqlmesh/utils/pydantic.py +6 -6
- sqlmesh/utils/windows.py +13 -3
- {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/METADATA +5 -5
- {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/RECORD +188 -183
- sqlmesh_dbt/cli.py +70 -7
- sqlmesh_dbt/console.py +14 -6
- sqlmesh_dbt/operations.py +103 -24
- sqlmesh_dbt/selectors.py +39 -1
- web/client/dist/assets/{Audits-Ucsx1GzF.js → Audits-CBiYyyx-.js} +1 -1
- web/client/dist/assets/{Banner-BWDzvavM.js → Banner-DSRbUlO5.js} +1 -1
- web/client/dist/assets/{ChevronDownIcon-D2VL13Ah.js → ChevronDownIcon-MK_nrjD_.js} +1 -1
- web/client/dist/assets/{ChevronRightIcon-DWGYbf1l.js → ChevronRightIcon-CLWtT22Q.js} +1 -1
- web/client/dist/assets/{Content-DdHDZM3I.js → Content-BNuGZN5l.js} +1 -1
- web/client/dist/assets/{Content-Bikfy8fh.js → Content-CSHJyW0n.js} +1 -1
- web/client/dist/assets/{Data-CzAJH7rW.js → Data-C1oRDbLx.js} +1 -1
- web/client/dist/assets/{DataCatalog-BJF11g8f.js → DataCatalog-HXyX2-_j.js} +1 -1
- web/client/dist/assets/{Editor-s0SBpV2y.js → Editor-BDyfpUuw.js} +1 -1
- web/client/dist/assets/{Editor-DgLhgKnm.js → Editor-D0jNItwC.js} +1 -1
- web/client/dist/assets/{Errors-D0m0O1d3.js → Errors-BfuFLcPi.js} +1 -1
- web/client/dist/assets/{FileExplorer-CEv0vXkt.js → FileExplorer-BR9IE3he.js} +1 -1
- web/client/dist/assets/{Footer-BwzXn8Ew.js → Footer-CgBEtiAh.js} +1 -1
- web/client/dist/assets/{Header-6heDkEqG.js → Header-DSqR6nSO.js} +1 -1
- web/client/dist/assets/{Input-obuJsD6k.js → Input-B-oZ6fGO.js} +1 -1
- web/client/dist/assets/Lineage-DYQVwDbD.js +1 -0
- web/client/dist/assets/{ListboxShow-HM9_qyrt.js → ListboxShow-BE5-xevs.js} +1 -1
- web/client/dist/assets/{ModelLineage-zWdKo0U2.js → ModelLineage-DkIFAYo4.js} +1 -1
- web/client/dist/assets/{Models-Bcu66SRz.js → Models-D5dWr8RB.js} +1 -1
- web/client/dist/assets/{Page-BWEEQfIt.js → Page-C-XfU5BR.js} +1 -1
- web/client/dist/assets/{Plan-C4gXCqlf.js → Plan-ZEuTINBq.js} +1 -1
- web/client/dist/assets/{PlusCircleIcon-CVDO651q.js → PlusCircleIcon-DVXAHG8_.js} +1 -1
- web/client/dist/assets/{ReportErrors-BT6xFwAr.js → ReportErrors-B7FEPzMB.js} +1 -1
- web/client/dist/assets/{Root-ryJoBK4h.js → Root-8aZyhPxF.js} +1 -1
- web/client/dist/assets/{SearchList-DB04sPb9.js → SearchList-W_iT2G82.js} +1 -1
- web/client/dist/assets/{SelectEnvironment-CUYcXUu6.js → SelectEnvironment-C65jALmO.js} +1 -1
- web/client/dist/assets/{SourceList-Doo_9ZGp.js → SourceList-DSLO6nVJ.js} +1 -1
- web/client/dist/assets/{SourceListItem-D5Mj7Dly.js → SourceListItem-BHt8d9-I.js} +1 -1
- web/client/dist/assets/{SplitPane-qHmkD1qy.js → SplitPane-CViaZmw6.js} +1 -1
- web/client/dist/assets/{Tests-DH1Z74ML.js → Tests-DhaVt5t1.js} +1 -1
- web/client/dist/assets/{Welcome-DqUJUNMF.js → Welcome-DvpjH-_4.js} +1 -1
- web/client/dist/assets/context-BctCsyGb.js +71 -0
- web/client/dist/assets/{context-Dr54UHLi.js → context-DFNeGsFF.js} +1 -1
- web/client/dist/assets/{editor-DYIP1yQ4.js → editor-CcO28cqd.js} +1 -1
- web/client/dist/assets/{file-DarlIDVi.js → file-CvJN3aZO.js} +1 -1
- web/client/dist/assets/{floating-ui.react-dom-BH3TFvkM.js → floating-ui.react-dom-CjE-JNW1.js} +1 -1
- web/client/dist/assets/{help-Bl8wqaQc.js → help-DuPhjipa.js} +1 -1
- web/client/dist/assets/{index-D1sR7wpN.js → index-C-dJH7yZ.js} +1 -1
- web/client/dist/assets/{index-O3mjYpnE.js → index-Dj0i1-CA.js} +2 -2
- web/client/dist/assets/{plan-CehRrJUG.js → plan-BTRSbjKn.js} +1 -1
- web/client/dist/assets/{popover-CqgMRE0G.js → popover-_Sf0yvOI.js} +1 -1
- web/client/dist/assets/{project-6gxepOhm.js → project-BvSOI8MY.js} +1 -1
- web/client/dist/index.html +1 -1
- web/client/dist/assets/Lineage-D0Hgdz2v.js +0 -1
- web/client/dist/assets/context-DgX0fp2E.js +0 -68
- {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/WHEEL +0 -0
- {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/entry_points.txt +0 -0
- {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/licenses/LICENSE +0 -0
- {sqlmesh-0.217.1.dev1.dist-info → sqlmesh-0.227.2.dev20.dist-info}/top_level.txt +0 -0
sqlmesh/core/plan/common.py
CHANGED
|
@@ -1,19 +1,40 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
import typing as t
|
|
3
|
+
import logging
|
|
4
|
+
from dataclasses import dataclass, field
|
|
2
5
|
|
|
3
|
-
from sqlmesh.core.
|
|
6
|
+
from sqlmesh.core.state_sync import StateReader
|
|
7
|
+
from sqlmesh.core.snapshot import Snapshot, SnapshotId, SnapshotIdAndVersion, SnapshotNameVersion
|
|
8
|
+
from sqlmesh.core.snapshot.definition import Interval
|
|
9
|
+
from sqlmesh.utils.dag import DAG
|
|
10
|
+
from sqlmesh.utils.date import now_timestamp
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
4
13
|
|
|
5
14
|
|
|
6
15
|
def should_force_rebuild(old: Snapshot, new: Snapshot) -> bool:
|
|
7
16
|
if new.is_view and new.is_indirect_non_breaking and not new.is_forward_only:
|
|
8
17
|
# View models always need to be rebuilt to reflect updated upstream dependencies
|
|
9
18
|
return True
|
|
10
|
-
if new.is_seed
|
|
19
|
+
if new.is_seed and not (
|
|
20
|
+
new.is_metadata
|
|
21
|
+
and new.previous_version
|
|
22
|
+
and new.previous_version.snapshot_id(new.name) == old.snapshot_id
|
|
23
|
+
):
|
|
11
24
|
# Seed models always need to be rebuilt to reflect changes in the seed file
|
|
25
|
+
# Unless only their metadata has been updated (eg description added) and the seed file has not been touched
|
|
12
26
|
return True
|
|
13
27
|
return is_breaking_kind_change(old, new)
|
|
14
28
|
|
|
15
29
|
|
|
16
30
|
def is_breaking_kind_change(old: Snapshot, new: Snapshot) -> bool:
|
|
31
|
+
if new.is_model != old.is_model:
|
|
32
|
+
# If one is a model and the other isn't, then we need to rebuild
|
|
33
|
+
return True
|
|
34
|
+
if not new.is_model or not old.is_model:
|
|
35
|
+
# If neither are models, then we don't need to rebuild
|
|
36
|
+
# Note that the remaining checks only apply to model snapshots
|
|
37
|
+
return False
|
|
17
38
|
if old.virtual_environment_mode != new.virtual_environment_mode:
|
|
18
39
|
# If the virtual environment mode has changed, then we need to rebuild
|
|
19
40
|
return True
|
|
@@ -27,3 +48,176 @@ def is_breaking_kind_change(old: Snapshot, new: Snapshot) -> bool:
|
|
|
27
48
|
# If the partitioning hasn't changed, then we don't need to rebuild
|
|
28
49
|
return False
|
|
29
50
|
return True
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass
|
|
54
|
+
class SnapshotIntervalClearRequest:
|
|
55
|
+
# affected snapshot
|
|
56
|
+
snapshot: SnapshotIdAndVersion
|
|
57
|
+
|
|
58
|
+
# which interval to clear
|
|
59
|
+
interval: Interval
|
|
60
|
+
|
|
61
|
+
# which environments this snapshot is currently promoted
|
|
62
|
+
# note that this can be empty if the snapshot exists because its ttl has not expired
|
|
63
|
+
# but it is not part of any particular environment
|
|
64
|
+
environment_names: t.Set[str] = field(default_factory=set)
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def snapshot_id(self) -> SnapshotId:
|
|
68
|
+
return self.snapshot.snapshot_id
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def sorted_environment_names(self) -> t.List[str]:
|
|
72
|
+
return list(sorted(self.environment_names))
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def identify_restatement_intervals_across_snapshot_versions(
|
|
76
|
+
state_reader: StateReader,
|
|
77
|
+
prod_restatements: t.Dict[str, Interval],
|
|
78
|
+
disable_restatement_models: t.Set[str],
|
|
79
|
+
loaded_snapshots: t.Dict[SnapshotId, Snapshot],
|
|
80
|
+
current_ts: t.Optional[int] = None,
|
|
81
|
+
) -> t.Dict[SnapshotId, SnapshotIntervalClearRequest]:
|
|
82
|
+
"""
|
|
83
|
+
Given a map of snapshot names + intervals to restate in prod:
|
|
84
|
+
- Look up matching snapshots (match based on name - regardless of version, to get all versions)
|
|
85
|
+
- For each match, also match downstream snapshots in each dev environment while filtering out models that have restatement disabled
|
|
86
|
+
- Return a list of all snapshots that are affected + the interval that needs to be cleared for each
|
|
87
|
+
|
|
88
|
+
The goal here is to produce a list of intervals to invalidate across all dev snapshots so that a subsequent plan or
|
|
89
|
+
cadence run in those environments causes the intervals to be repopulated.
|
|
90
|
+
"""
|
|
91
|
+
if not prod_restatements:
|
|
92
|
+
return {}
|
|
93
|
+
|
|
94
|
+
# Although :loaded_snapshots is sourced from RestatementStage.all_snapshots, since the only time we ever need
|
|
95
|
+
# to clear intervals across all environments is for prod, the :loaded_snapshots here are always from prod
|
|
96
|
+
prod_name_versions: t.Set[SnapshotNameVersion] = {
|
|
97
|
+
s.name_version for s in loaded_snapshots.values()
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
snapshot_intervals_to_clear: t.Dict[SnapshotId, SnapshotIntervalClearRequest] = {}
|
|
101
|
+
|
|
102
|
+
for env_summary in state_reader.get_environments_summary():
|
|
103
|
+
# Fetch the full environment object one at a time to avoid loading all environments into memory at once
|
|
104
|
+
env = state_reader.get_environment(env_summary.name)
|
|
105
|
+
if not env:
|
|
106
|
+
logger.warning("Environment %s not found", env_summary.name)
|
|
107
|
+
continue
|
|
108
|
+
|
|
109
|
+
snapshots_by_name = {s.name: s.table_info for s in env.snapshots}
|
|
110
|
+
|
|
111
|
+
# We dont just restate matching snapshots, we also have to restate anything downstream of them
|
|
112
|
+
# so that if A gets restated in prod and dev has A <- B <- C, B and C get restated in dev
|
|
113
|
+
env_dag = DAG({s.name: {p.name for p in s.parents} for s in env.snapshots})
|
|
114
|
+
|
|
115
|
+
for restate_snapshot_name, interval in prod_restatements.items():
|
|
116
|
+
if restate_snapshot_name not in snapshots_by_name:
|
|
117
|
+
# snapshot is not promoted in this environment
|
|
118
|
+
continue
|
|
119
|
+
|
|
120
|
+
affected_snapshot_names = [
|
|
121
|
+
x
|
|
122
|
+
for x in ([restate_snapshot_name] + env_dag.downstream(restate_snapshot_name))
|
|
123
|
+
if x not in disable_restatement_models
|
|
124
|
+
]
|
|
125
|
+
|
|
126
|
+
for affected_snapshot_name in affected_snapshot_names:
|
|
127
|
+
affected_snapshot = snapshots_by_name[affected_snapshot_name]
|
|
128
|
+
|
|
129
|
+
# Don't clear intervals for a dev snapshot if it shares the same physical version with prod.
|
|
130
|
+
# Otherwise, prod will be affected by what should be a dev operation
|
|
131
|
+
if affected_snapshot.name_version in prod_name_versions:
|
|
132
|
+
continue
|
|
133
|
+
|
|
134
|
+
clear_request = snapshot_intervals_to_clear.get(affected_snapshot.snapshot_id)
|
|
135
|
+
if not clear_request:
|
|
136
|
+
clear_request = SnapshotIntervalClearRequest(
|
|
137
|
+
snapshot=affected_snapshot.id_and_version, interval=interval
|
|
138
|
+
)
|
|
139
|
+
snapshot_intervals_to_clear[affected_snapshot.snapshot_id] = clear_request
|
|
140
|
+
|
|
141
|
+
clear_request.environment_names |= set([env.name])
|
|
142
|
+
|
|
143
|
+
# snapshot_intervals_to_clear now contains the entire hierarchy of affected snapshots based
|
|
144
|
+
# on building the DAG for each environment and including downstream snapshots
|
|
145
|
+
# but, what if there are affected snapshots that arent part of any environment?
|
|
146
|
+
unique_snapshot_names = set(snapshot_id.name for snapshot_id in snapshot_intervals_to_clear)
|
|
147
|
+
|
|
148
|
+
current_ts = current_ts or now_timestamp()
|
|
149
|
+
all_matching_non_prod_snapshots = {
|
|
150
|
+
s.snapshot_id: s
|
|
151
|
+
for s in state_reader.get_snapshots_by_names(
|
|
152
|
+
snapshot_names=unique_snapshot_names, current_ts=current_ts, exclude_expired=True
|
|
153
|
+
)
|
|
154
|
+
# Don't clear intervals for a snapshot if it shares the same physical version with prod.
|
|
155
|
+
# Otherwise, prod will be affected by what should be a dev operation
|
|
156
|
+
if s.name_version not in prod_name_versions
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
# identify the ones that we havent picked up yet, which are the ones that dont exist in any environment
|
|
160
|
+
if remaining_snapshot_ids := set(all_matching_non_prod_snapshots).difference(
|
|
161
|
+
snapshot_intervals_to_clear
|
|
162
|
+
):
|
|
163
|
+
# these snapshot id's exist in isolation and may be related to a downstream dependency of the :prod_restatements,
|
|
164
|
+
# rather than directly related, so we can't simply look up the interval to clear based on :prod_restatements.
|
|
165
|
+
# To figure out the interval that should be cleared, we can match to the existing list based on name
|
|
166
|
+
# and conservatively take the widest interval that shows up
|
|
167
|
+
snapshot_name_to_widest_interval: t.Dict[str, Interval] = {}
|
|
168
|
+
for s_id, clear_request in snapshot_intervals_to_clear.items():
|
|
169
|
+
current_start, current_end = snapshot_name_to_widest_interval.get(
|
|
170
|
+
s_id.name, clear_request.interval
|
|
171
|
+
)
|
|
172
|
+
next_start, next_end = clear_request.interval
|
|
173
|
+
|
|
174
|
+
next_start = min(current_start, next_start)
|
|
175
|
+
next_end = max(current_end, next_end)
|
|
176
|
+
|
|
177
|
+
snapshot_name_to_widest_interval[s_id.name] = (next_start, next_end)
|
|
178
|
+
|
|
179
|
+
for remaining_snapshot_id in remaining_snapshot_ids:
|
|
180
|
+
remaining_snapshot = all_matching_non_prod_snapshots[remaining_snapshot_id]
|
|
181
|
+
snapshot_intervals_to_clear[remaining_snapshot_id] = SnapshotIntervalClearRequest(
|
|
182
|
+
snapshot=remaining_snapshot,
|
|
183
|
+
interval=snapshot_name_to_widest_interval[remaining_snapshot_id.name],
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
# for any affected full_history_restatement_only snapshots, we need to widen the intervals being restated to
|
|
187
|
+
# include the whole time range for that snapshot. This requires a call to state to load the full snapshot record,
|
|
188
|
+
# so we only do it if necessary
|
|
189
|
+
full_history_restatement_snapshot_ids = [
|
|
190
|
+
# FIXME: full_history_restatement_only is just one indicator that the snapshot can only be fully refreshed, the other one is Model.depends_on_self
|
|
191
|
+
# however, to figure out depends_on_self, we have to render all the model queries which, alongside having to fetch full snapshots from state,
|
|
192
|
+
# is problematic in secure environments that are deliberately isolated from arbitrary user code (since rendering a query may require user macros to be present)
|
|
193
|
+
# So for now, these are not considered
|
|
194
|
+
s_id
|
|
195
|
+
for s_id, s in snapshot_intervals_to_clear.items()
|
|
196
|
+
if s.snapshot.full_history_restatement_only
|
|
197
|
+
]
|
|
198
|
+
if full_history_restatement_snapshot_ids:
|
|
199
|
+
# only load full snapshot records that we havent already loaded
|
|
200
|
+
additional_snapshots = state_reader.get_snapshots(
|
|
201
|
+
[
|
|
202
|
+
s.snapshot_id
|
|
203
|
+
for s in full_history_restatement_snapshot_ids
|
|
204
|
+
if s.snapshot_id not in loaded_snapshots
|
|
205
|
+
]
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
all_snapshots = loaded_snapshots | additional_snapshots
|
|
209
|
+
|
|
210
|
+
for full_snapshot_id in full_history_restatement_snapshot_ids:
|
|
211
|
+
full_snapshot = all_snapshots[full_snapshot_id]
|
|
212
|
+
intervals_to_clear = snapshot_intervals_to_clear[full_snapshot_id]
|
|
213
|
+
|
|
214
|
+
original_start, original_end = intervals_to_clear.interval
|
|
215
|
+
|
|
216
|
+
# get_removal_interval() widens intervals if necessary
|
|
217
|
+
new_interval = full_snapshot.get_removal_interval(
|
|
218
|
+
start=original_start, end=original_end
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
intervals_to_clear.interval = new_interval
|
|
222
|
+
|
|
223
|
+
return snapshot_intervals_to_clear
|
sqlmesh/core/plan/definition.py
CHANGED
|
@@ -58,7 +58,18 @@ class Plan(PydanticModel, frozen=True):
|
|
|
58
58
|
indirectly_modified: t.Dict[SnapshotId, t.Set[SnapshotId]]
|
|
59
59
|
|
|
60
60
|
deployability_index: DeployabilityIndex
|
|
61
|
+
selected_models_to_restate: t.Optional[t.Set[str]] = None
|
|
62
|
+
"""Models that have been explicitly selected for restatement by a user"""
|
|
61
63
|
restatements: t.Dict[SnapshotId, Interval]
|
|
64
|
+
"""
|
|
65
|
+
All models being restated, which are typically the explicitly selected ones + their downstream dependencies.
|
|
66
|
+
|
|
67
|
+
Note that dev previews are also considered restatements, so :selected_models_to_restate can be empty
|
|
68
|
+
while :restatements is still populated with dev previews
|
|
69
|
+
"""
|
|
70
|
+
restate_all_snapshots: bool
|
|
71
|
+
"""Whether or not to clear intervals from state for other versions of the models listed in :restatements"""
|
|
72
|
+
|
|
62
73
|
start_override_per_model: t.Optional[t.Dict[str, datetime]]
|
|
63
74
|
end_override_per_model: t.Optional[t.Dict[str, datetime]]
|
|
64
75
|
|
|
@@ -202,8 +213,8 @@ class Plan(PydanticModel, frozen=True):
|
|
|
202
213
|
|
|
203
214
|
snapshots_by_name = self.context_diff.snapshots_by_name
|
|
204
215
|
snapshots = [s.table_info for s in self.snapshots.values()]
|
|
205
|
-
|
|
206
|
-
if self.is_dev
|
|
216
|
+
promotable_snapshot_ids = None
|
|
217
|
+
if self.is_dev:
|
|
207
218
|
if self.selected_models_to_backfill is not None:
|
|
208
219
|
# Only promote models that have been explicitly selected for backfill.
|
|
209
220
|
promotable_snapshot_ids = {
|
|
@@ -214,12 +225,14 @@ class Plan(PydanticModel, frozen=True):
|
|
|
214
225
|
if m in snapshots_by_name
|
|
215
226
|
],
|
|
216
227
|
}
|
|
217
|
-
|
|
228
|
+
elif not self.include_unmodified:
|
|
218
229
|
promotable_snapshot_ids = self.context_diff.promotable_snapshot_ids.copy()
|
|
219
230
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
231
|
+
promoted_snapshot_ids = (
|
|
232
|
+
[s.snapshot_id for s in snapshots if s.snapshot_id in promotable_snapshot_ids]
|
|
233
|
+
if promotable_snapshot_ids is not None
|
|
234
|
+
else None
|
|
235
|
+
)
|
|
223
236
|
|
|
224
237
|
previous_finalized_snapshots = (
|
|
225
238
|
self.context_diff.environment_snapshots
|
|
@@ -259,6 +272,7 @@ class Plan(PydanticModel, frozen=True):
|
|
|
259
272
|
skip_backfill=self.skip_backfill,
|
|
260
273
|
empty_backfill=self.empty_backfill,
|
|
261
274
|
restatements={s.name: i for s, i in self.restatements.items()},
|
|
275
|
+
restate_all_snapshots=self.restate_all_snapshots,
|
|
262
276
|
is_dev=self.is_dev,
|
|
263
277
|
allow_destructive_models=self.allow_destructive_models,
|
|
264
278
|
allow_additive_models=self.allow_additive_models,
|
|
@@ -303,6 +317,7 @@ class EvaluatablePlan(PydanticModel):
|
|
|
303
317
|
skip_backfill: bool
|
|
304
318
|
empty_backfill: bool
|
|
305
319
|
restatements: t.Dict[str, Interval]
|
|
320
|
+
restate_all_snapshots: bool
|
|
306
321
|
is_dev: bool
|
|
307
322
|
allow_destructive_models: t.Set[str]
|
|
308
323
|
allow_additive_models: t.Set[str]
|
sqlmesh/core/plan/evaluator.py
CHANGED
|
@@ -22,7 +22,7 @@ from sqlmesh.core import constants as c
|
|
|
22
22
|
from sqlmesh.core.console import Console, get_console
|
|
23
23
|
from sqlmesh.core.environment import EnvironmentNamingInfo, execute_environment_statements
|
|
24
24
|
from sqlmesh.core.macros import RuntimeStage
|
|
25
|
-
from sqlmesh.core.snapshot.definition import
|
|
25
|
+
from sqlmesh.core.snapshot.definition import to_view_mapping, SnapshotTableInfo
|
|
26
26
|
from sqlmesh.core.plan import stages
|
|
27
27
|
from sqlmesh.core.plan.definition import EvaluatablePlan
|
|
28
28
|
from sqlmesh.core.scheduler import Scheduler
|
|
@@ -33,17 +33,15 @@ from sqlmesh.core.snapshot import (
|
|
|
33
33
|
SnapshotIntervals,
|
|
34
34
|
SnapshotId,
|
|
35
35
|
SnapshotInfoLike,
|
|
36
|
-
SnapshotTableInfo,
|
|
37
36
|
SnapshotCreationFailedError,
|
|
38
|
-
SnapshotNameVersion,
|
|
39
37
|
)
|
|
40
38
|
from sqlmesh.utils import to_snake_case
|
|
41
39
|
from sqlmesh.core.state_sync import StateSync
|
|
40
|
+
from sqlmesh.core.plan.common import identify_restatement_intervals_across_snapshot_versions
|
|
42
41
|
from sqlmesh.utils import CorrelationId
|
|
43
42
|
from sqlmesh.utils.concurrency import NodeExecutionFailedError
|
|
44
|
-
from sqlmesh.utils.errors import PlanError, SQLMeshError
|
|
45
|
-
from sqlmesh.utils.
|
|
46
|
-
from sqlmesh.utils.date import now
|
|
43
|
+
from sqlmesh.utils.errors import PlanError, ConflictingPlanError, SQLMeshError
|
|
44
|
+
from sqlmesh.utils.date import now, to_timestamp
|
|
47
45
|
|
|
48
46
|
logger = logging.getLogger(__name__)
|
|
49
47
|
|
|
@@ -260,6 +258,7 @@ class BuiltInPlanEvaluator(PlanEvaluator):
|
|
|
260
258
|
allow_additive_snapshots=plan.allow_additive_models,
|
|
261
259
|
selected_snapshot_ids=stage.selected_snapshot_ids,
|
|
262
260
|
selected_models=plan.selected_models,
|
|
261
|
+
is_restatement=bool(plan.restatements),
|
|
263
262
|
)
|
|
264
263
|
if errors:
|
|
265
264
|
raise PlanError("Plan application failed.")
|
|
@@ -289,27 +288,78 @@ class BuiltInPlanEvaluator(PlanEvaluator):
|
|
|
289
288
|
def visit_restatement_stage(
|
|
290
289
|
self, stage: stages.RestatementStage, plan: EvaluatablePlan
|
|
291
290
|
) -> None:
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
#
|
|
295
|
-
#
|
|
296
|
-
#
|
|
297
|
-
#
|
|
291
|
+
# Restating intervals on prod plans means that once the data for the intervals being restated has been backfilled
|
|
292
|
+
# (which happens in the backfill stage) then we need to clear those intervals *from state* across all other environments.
|
|
293
|
+
#
|
|
294
|
+
# This ensures that work done in dev environments can still be promoted to prod by forcing dev environments to
|
|
295
|
+
# re-run intervals that changed in prod (because after this stage runs they are cleared from state and thus show as missing)
|
|
296
|
+
#
|
|
297
|
+
# It also means that any new dev environments created while this restatement plan was running also get the
|
|
298
|
+
# correct intervals cleared because we look up matching snapshots as at right now and not as at the time the plan
|
|
299
|
+
# was created, which could have been several hours ago if there was a lot of data to restate.
|
|
298
300
|
#
|
|
299
301
|
# Without this rule, its possible that promoting a dev table to prod will introduce old data to prod
|
|
300
|
-
snapshot_intervals_to_restate.update(
|
|
301
|
-
self._restatement_intervals_across_all_environments(
|
|
302
|
-
prod_restatements=plan.restatements,
|
|
303
|
-
disable_restatement_models=plan.disabled_restatement_models,
|
|
304
|
-
loaded_snapshots={s.snapshot_id: s for s in stage.all_snapshots.values()},
|
|
305
|
-
)
|
|
306
|
-
)
|
|
307
302
|
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
303
|
+
intervals_to_clear = identify_restatement_intervals_across_snapshot_versions(
|
|
304
|
+
state_reader=self.state_sync,
|
|
305
|
+
prod_restatements=plan.restatements,
|
|
306
|
+
disable_restatement_models=plan.disabled_restatement_models,
|
|
307
|
+
loaded_snapshots={s.snapshot_id: s for s in stage.all_snapshots.values()},
|
|
308
|
+
current_ts=to_timestamp(plan.execution_time or now()),
|
|
311
309
|
)
|
|
312
310
|
|
|
311
|
+
if not intervals_to_clear:
|
|
312
|
+
# Nothing to do
|
|
313
|
+
return
|
|
314
|
+
|
|
315
|
+
# While the restatements were being processed, did any of the snapshots being restated get new versions deployed?
|
|
316
|
+
# If they did, they will not reflect the data that just got restated, so we need to notify the user
|
|
317
|
+
deployed_during_restatement: t.Dict[
|
|
318
|
+
str, t.Tuple[SnapshotTableInfo, SnapshotTableInfo]
|
|
319
|
+
] = {} # tuple of (restated_snapshot, current_prod_snapshot)
|
|
320
|
+
|
|
321
|
+
if deployed_env := self.state_sync.get_environment(plan.environment.name):
|
|
322
|
+
promoted_snapshots_by_name = {s.name: s for s in deployed_env.snapshots}
|
|
323
|
+
|
|
324
|
+
for name in plan.restatements:
|
|
325
|
+
snapshot = stage.all_snapshots[name]
|
|
326
|
+
version = snapshot.table_info.version
|
|
327
|
+
if (
|
|
328
|
+
prod_snapshot := promoted_snapshots_by_name.get(name)
|
|
329
|
+
) and prod_snapshot.version != version:
|
|
330
|
+
deployed_during_restatement[name] = (
|
|
331
|
+
snapshot.table_info,
|
|
332
|
+
prod_snapshot.table_info,
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
# we need to *not* clear the intervals on the snapshots where new versions were deployed while the restatement was running in order to prevent
|
|
336
|
+
# subsequent plans from having unexpected intervals to backfill.
|
|
337
|
+
# we instead list the affected models and abort the plan with an error so the user can decide what to do
|
|
338
|
+
# (either re-attempt the restatement plan or leave things as they are)
|
|
339
|
+
filtered_intervals_to_clear = [
|
|
340
|
+
(s.snapshot, s.interval)
|
|
341
|
+
for s in intervals_to_clear.values()
|
|
342
|
+
if s.snapshot.name not in deployed_during_restatement
|
|
343
|
+
]
|
|
344
|
+
|
|
345
|
+
if filtered_intervals_to_clear:
|
|
346
|
+
# We still clear intervals in other envs for models that were successfully restated without having new versions promoted during restatement
|
|
347
|
+
self.state_sync.remove_intervals(
|
|
348
|
+
snapshot_intervals=filtered_intervals_to_clear,
|
|
349
|
+
remove_shared_versions=plan.is_prod,
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
if deployed_env and deployed_during_restatement:
|
|
353
|
+
self.console.log_models_updated_during_restatement(
|
|
354
|
+
list(deployed_during_restatement.values()),
|
|
355
|
+
plan.environment.naming_info,
|
|
356
|
+
self.default_catalog,
|
|
357
|
+
)
|
|
358
|
+
raise ConflictingPlanError(
|
|
359
|
+
f"Another plan ({deployed_env.summary.plan_id}) deployed new versions of {len(deployed_during_restatement)} models in the target environment '{plan.environment.name}' while they were being restated by this plan.\n"
|
|
360
|
+
"Please re-apply your plan if these new versions should be restated."
|
|
361
|
+
)
|
|
362
|
+
|
|
313
363
|
def visit_environment_record_update_stage(
|
|
314
364
|
self, stage: stages.EnvironmentRecordUpdateStage, plan: EvaluatablePlan
|
|
315
365
|
) -> None:
|
|
@@ -422,97 +472,6 @@ class BuiltInPlanEvaluator(PlanEvaluator):
|
|
|
422
472
|
on_complete=on_complete,
|
|
423
473
|
)
|
|
424
474
|
|
|
425
|
-
def _restatement_intervals_across_all_environments(
|
|
426
|
-
self,
|
|
427
|
-
prod_restatements: t.Dict[str, Interval],
|
|
428
|
-
disable_restatement_models: t.Set[str],
|
|
429
|
-
loaded_snapshots: t.Dict[SnapshotId, Snapshot],
|
|
430
|
-
) -> t.Set[t.Tuple[SnapshotTableInfo, Interval]]:
|
|
431
|
-
"""
|
|
432
|
-
Given a map of snapshot names + intervals to restate in prod:
|
|
433
|
-
- Look up matching snapshots across all environments (match based on name - regardless of version)
|
|
434
|
-
- For each match, also match downstream snapshots while filtering out models that have restatement disabled
|
|
435
|
-
- Return all matches mapped to the intervals of the prod snapshot being restated
|
|
436
|
-
|
|
437
|
-
The goal here is to produce a list of intervals to invalidate across all environments so that a cadence
|
|
438
|
-
run in those environments causes the intervals to be repopulated
|
|
439
|
-
"""
|
|
440
|
-
if not prod_restatements:
|
|
441
|
-
return set()
|
|
442
|
-
|
|
443
|
-
prod_name_versions: t.Set[SnapshotNameVersion] = {
|
|
444
|
-
s.name_version for s in loaded_snapshots.values()
|
|
445
|
-
}
|
|
446
|
-
|
|
447
|
-
snapshots_to_restate: t.Dict[SnapshotId, t.Tuple[SnapshotTableInfo, Interval]] = {}
|
|
448
|
-
|
|
449
|
-
for env_summary in self.state_sync.get_environments_summary():
|
|
450
|
-
# Fetch the full environment object one at a time to avoid loading all environments into memory at once
|
|
451
|
-
env = self.state_sync.get_environment(env_summary.name)
|
|
452
|
-
if not env:
|
|
453
|
-
logger.warning("Environment %s not found", env_summary.name)
|
|
454
|
-
continue
|
|
455
|
-
|
|
456
|
-
keyed_snapshots = {s.name: s.table_info for s in env.snapshots}
|
|
457
|
-
|
|
458
|
-
# We dont just restate matching snapshots, we also have to restate anything downstream of them
|
|
459
|
-
# so that if A gets restated in prod and dev has A <- B <- C, B and C get restated in dev
|
|
460
|
-
env_dag = DAG({s.name: {p.name for p in s.parents} for s in env.snapshots})
|
|
461
|
-
|
|
462
|
-
for restatement, intervals in prod_restatements.items():
|
|
463
|
-
if restatement not in keyed_snapshots:
|
|
464
|
-
continue
|
|
465
|
-
affected_snapshot_names = [
|
|
466
|
-
x
|
|
467
|
-
for x in ([restatement] + env_dag.downstream(restatement))
|
|
468
|
-
if x not in disable_restatement_models
|
|
469
|
-
]
|
|
470
|
-
snapshots_to_restate.update(
|
|
471
|
-
{
|
|
472
|
-
keyed_snapshots[a].snapshot_id: (keyed_snapshots[a], intervals)
|
|
473
|
-
for a in affected_snapshot_names
|
|
474
|
-
# Don't restate a snapshot if it shares the version with a snapshot in prod
|
|
475
|
-
if keyed_snapshots[a].name_version not in prod_name_versions
|
|
476
|
-
}
|
|
477
|
-
)
|
|
478
|
-
|
|
479
|
-
# for any affected full_history_restatement_only snapshots, we need to widen the intervals being restated to
|
|
480
|
-
# include the whole time range for that snapshot. This requires a call to state to load the full snapshot record,
|
|
481
|
-
# so we only do it if necessary
|
|
482
|
-
full_history_restatement_snapshot_ids = [
|
|
483
|
-
# FIXME: full_history_restatement_only is just one indicator that the snapshot can only be fully refreshed, the other one is Model.depends_on_self
|
|
484
|
-
# however, to figure out depends_on_self, we have to render all the model queries which, alongside having to fetch full snapshots from state,
|
|
485
|
-
# is problematic in secure environments that are deliberately isolated from arbitrary user code (since rendering a query may require user macros to be present)
|
|
486
|
-
# So for now, these are not considered
|
|
487
|
-
s_id
|
|
488
|
-
for s_id, s in snapshots_to_restate.items()
|
|
489
|
-
if s[0].full_history_restatement_only
|
|
490
|
-
]
|
|
491
|
-
if full_history_restatement_snapshot_ids:
|
|
492
|
-
# only load full snapshot records that we havent already loaded
|
|
493
|
-
additional_snapshots = self.state_sync.get_snapshots(
|
|
494
|
-
[
|
|
495
|
-
s.snapshot_id
|
|
496
|
-
for s in full_history_restatement_snapshot_ids
|
|
497
|
-
if s.snapshot_id not in loaded_snapshots
|
|
498
|
-
]
|
|
499
|
-
)
|
|
500
|
-
|
|
501
|
-
all_snapshots = loaded_snapshots | additional_snapshots
|
|
502
|
-
|
|
503
|
-
for full_snapshot_id in full_history_restatement_snapshot_ids:
|
|
504
|
-
full_snapshot = all_snapshots[full_snapshot_id]
|
|
505
|
-
_, original_intervals = snapshots_to_restate[full_snapshot_id]
|
|
506
|
-
original_start, original_end = original_intervals
|
|
507
|
-
|
|
508
|
-
# get_removal_interval() widens intervals if necessary
|
|
509
|
-
new_intervals = full_snapshot.get_removal_interval(
|
|
510
|
-
start=original_start, end=original_end
|
|
511
|
-
)
|
|
512
|
-
snapshots_to_restate[full_snapshot_id] = (full_snapshot.table_info, new_intervals)
|
|
513
|
-
|
|
514
|
-
return set(snapshots_to_restate.values())
|
|
515
|
-
|
|
516
475
|
def _update_intervals_for_new_snapshots(self, snapshots: t.Collection[Snapshot]) -> None:
|
|
517
476
|
snapshots_intervals: t.List[SnapshotIntervals] = []
|
|
518
477
|
for snapshot in snapshots:
|
sqlmesh/core/plan/explainer.py
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import abc
|
|
2
4
|
import typing as t
|
|
3
5
|
import logging
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from collections import defaultdict
|
|
4
8
|
|
|
5
9
|
from rich.console import Console as RichConsole
|
|
6
10
|
from rich.tree import Tree
|
|
@@ -8,6 +12,10 @@ from sqlglot.dialects.dialect import DialectType
|
|
|
8
12
|
from sqlmesh.core import constants as c
|
|
9
13
|
from sqlmesh.core.console import Console, TerminalConsole, get_console
|
|
10
14
|
from sqlmesh.core.environment import EnvironmentNamingInfo
|
|
15
|
+
from sqlmesh.core.plan.common import (
|
|
16
|
+
SnapshotIntervalClearRequest,
|
|
17
|
+
identify_restatement_intervals_across_snapshot_versions,
|
|
18
|
+
)
|
|
11
19
|
from sqlmesh.core.plan.definition import EvaluatablePlan, SnapshotIntervals
|
|
12
20
|
from sqlmesh.core.plan import stages
|
|
13
21
|
from sqlmesh.core.plan.evaluator import (
|
|
@@ -16,6 +24,8 @@ from sqlmesh.core.plan.evaluator import (
|
|
|
16
24
|
from sqlmesh.core.state_sync import StateReader
|
|
17
25
|
from sqlmesh.core.snapshot.definition import (
|
|
18
26
|
SnapshotInfoMixin,
|
|
27
|
+
SnapshotIdAndVersion,
|
|
28
|
+
model_display_name,
|
|
19
29
|
)
|
|
20
30
|
from sqlmesh.utils import Verbosity, rich as srich, to_snake_case
|
|
21
31
|
from sqlmesh.utils.date import to_ts
|
|
@@ -45,6 +55,15 @@ class PlanExplainer(PlanEvaluator):
|
|
|
45
55
|
explainer_console = _get_explainer_console(
|
|
46
56
|
self.console, plan.environment, self.default_catalog
|
|
47
57
|
)
|
|
58
|
+
|
|
59
|
+
# add extra metadata that's only needed at this point for better --explain output
|
|
60
|
+
plan_stages = [
|
|
61
|
+
ExplainableRestatementStage.from_restatement_stage(stage, self.state_reader, plan)
|
|
62
|
+
if isinstance(stage, stages.RestatementStage)
|
|
63
|
+
else stage
|
|
64
|
+
for stage in plan_stages
|
|
65
|
+
]
|
|
66
|
+
|
|
48
67
|
explainer_console.explain(plan_stages)
|
|
49
68
|
|
|
50
69
|
|
|
@@ -54,6 +73,41 @@ class ExplainerConsole(abc.ABC):
|
|
|
54
73
|
pass
|
|
55
74
|
|
|
56
75
|
|
|
76
|
+
@dataclass
|
|
77
|
+
class ExplainableRestatementStage(stages.RestatementStage):
|
|
78
|
+
"""
|
|
79
|
+
This brings forward some calculations that would usually be done in the evaluator so the user can be given a better indication
|
|
80
|
+
of what might happen when they ask for the plan to be explained
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
snapshot_intervals_to_clear: t.Dict[str, t.List[SnapshotIntervalClearRequest]]
|
|
84
|
+
"""Which snapshots from other environments would have intervals cleared as part of restatement, grouped by name."""
|
|
85
|
+
|
|
86
|
+
@classmethod
|
|
87
|
+
def from_restatement_stage(
|
|
88
|
+
cls: t.Type[ExplainableRestatementStage],
|
|
89
|
+
stage: stages.RestatementStage,
|
|
90
|
+
state_reader: StateReader,
|
|
91
|
+
plan: EvaluatablePlan,
|
|
92
|
+
) -> ExplainableRestatementStage:
|
|
93
|
+
all_restatement_intervals = identify_restatement_intervals_across_snapshot_versions(
|
|
94
|
+
state_reader=state_reader,
|
|
95
|
+
prod_restatements=plan.restatements,
|
|
96
|
+
disable_restatement_models=plan.disabled_restatement_models,
|
|
97
|
+
loaded_snapshots={s.snapshot_id: s for s in stage.all_snapshots.values()},
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Group the interval clear requests by snapshot name to make them easier to write to the console
|
|
101
|
+
snapshot_intervals_to_clear = defaultdict(list)
|
|
102
|
+
for clear_request in all_restatement_intervals.values():
|
|
103
|
+
snapshot_intervals_to_clear[clear_request.snapshot.name].append(clear_request)
|
|
104
|
+
|
|
105
|
+
return cls(
|
|
106
|
+
snapshot_intervals_to_clear=snapshot_intervals_to_clear,
|
|
107
|
+
all_snapshots=stage.all_snapshots,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
|
|
57
111
|
MAX_TREE_LENGTH = 10
|
|
58
112
|
|
|
59
113
|
|
|
@@ -146,11 +200,37 @@ class RichExplainerConsole(ExplainerConsole):
|
|
|
146
200
|
tree.add(display_name)
|
|
147
201
|
return tree
|
|
148
202
|
|
|
149
|
-
def
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
203
|
+
def visit_explainable_restatement_stage(self, stage: ExplainableRestatementStage) -> Tree:
|
|
204
|
+
return self.visit_restatement_stage(stage)
|
|
205
|
+
|
|
206
|
+
def visit_restatement_stage(
|
|
207
|
+
self, stage: t.Union[ExplainableRestatementStage, stages.RestatementStage]
|
|
208
|
+
) -> Tree:
|
|
209
|
+
tree = Tree(
|
|
210
|
+
"[bold]Invalidate data intervals in state for development environments to prevent old data from being promoted[/bold]\n"
|
|
211
|
+
"This only affects state and will not clear physical data from the tables until the next plan for each environment"
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
if isinstance(stage, ExplainableRestatementStage) and (
|
|
215
|
+
snapshot_intervals := stage.snapshot_intervals_to_clear
|
|
216
|
+
):
|
|
217
|
+
for name, clear_requests in snapshot_intervals.items():
|
|
218
|
+
display_name = model_display_name(
|
|
219
|
+
name, self.environment_naming_info, self.default_catalog, self.dialect
|
|
220
|
+
)
|
|
221
|
+
interval_start = min(cr.interval[0] for cr in clear_requests)
|
|
222
|
+
interval_end = max(cr.interval[1] for cr in clear_requests)
|
|
223
|
+
|
|
224
|
+
if not interval_start or not interval_end:
|
|
225
|
+
continue
|
|
226
|
+
|
|
227
|
+
node = tree.add(f"{display_name} [{to_ts(interval_start)} - {to_ts(interval_end)}]")
|
|
228
|
+
|
|
229
|
+
all_environment_names = sorted(
|
|
230
|
+
set(env_name for cr in clear_requests for env_name in cr.environment_names)
|
|
231
|
+
)
|
|
232
|
+
node.add("in environments: " + ", ".join(all_environment_names))
|
|
233
|
+
|
|
154
234
|
return tree
|
|
155
235
|
|
|
156
236
|
def visit_backfill_stage(self, stage: stages.BackfillStage) -> Tree:
|
|
@@ -265,12 +345,14 @@ class RichExplainerConsole(ExplainerConsole):
|
|
|
265
345
|
|
|
266
346
|
def _display_name(
|
|
267
347
|
self,
|
|
268
|
-
snapshot: SnapshotInfoMixin,
|
|
348
|
+
snapshot: t.Union[SnapshotInfoMixin, SnapshotIdAndVersion],
|
|
269
349
|
environment_naming_info: t.Optional[EnvironmentNamingInfo] = None,
|
|
270
350
|
) -> str:
|
|
271
351
|
return snapshot.display_name(
|
|
272
|
-
environment_naming_info or self.environment_naming_info,
|
|
273
|
-
self.default_catalog
|
|
352
|
+
environment_naming_info=environment_naming_info or self.environment_naming_info,
|
|
353
|
+
default_catalog=self.default_catalog
|
|
354
|
+
if self.verbosity < Verbosity.VERY_VERBOSE
|
|
355
|
+
else None,
|
|
274
356
|
dialect=self.dialect,
|
|
275
357
|
)
|
|
276
358
|
|