sqlmesh 0.213.1.dev1__py3-none-any.whl → 0.227.2.dev4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlmesh/__init__.py +12 -2
- sqlmesh/_version.py +2 -2
- sqlmesh/cli/main.py +0 -44
- sqlmesh/cli/project_init.py +11 -2
- sqlmesh/core/_typing.py +1 -0
- sqlmesh/core/audit/definition.py +8 -2
- sqlmesh/core/config/__init__.py +1 -1
- sqlmesh/core/config/connection.py +17 -5
- sqlmesh/core/config/dbt.py +13 -0
- sqlmesh/core/config/janitor.py +12 -0
- sqlmesh/core/config/loader.py +7 -0
- sqlmesh/core/config/model.py +2 -0
- sqlmesh/core/config/root.py +3 -0
- sqlmesh/core/console.py +81 -3
- sqlmesh/core/constants.py +1 -1
- sqlmesh/core/context.py +69 -26
- sqlmesh/core/dialect.py +3 -0
- sqlmesh/core/engine_adapter/_typing.py +2 -0
- sqlmesh/core/engine_adapter/base.py +322 -22
- sqlmesh/core/engine_adapter/base_postgres.py +17 -1
- sqlmesh/core/engine_adapter/bigquery.py +146 -7
- sqlmesh/core/engine_adapter/clickhouse.py +17 -13
- sqlmesh/core/engine_adapter/databricks.py +33 -2
- sqlmesh/core/engine_adapter/fabric.py +10 -29
- sqlmesh/core/engine_adapter/mixins.py +142 -48
- sqlmesh/core/engine_adapter/mssql.py +15 -4
- sqlmesh/core/engine_adapter/mysql.py +2 -2
- sqlmesh/core/engine_adapter/postgres.py +9 -3
- sqlmesh/core/engine_adapter/redshift.py +4 -0
- sqlmesh/core/engine_adapter/risingwave.py +1 -0
- sqlmesh/core/engine_adapter/shared.py +6 -0
- sqlmesh/core/engine_adapter/snowflake.py +82 -11
- sqlmesh/core/engine_adapter/spark.py +14 -10
- sqlmesh/core/engine_adapter/trino.py +4 -2
- sqlmesh/core/environment.py +2 -0
- sqlmesh/core/janitor.py +181 -0
- sqlmesh/core/lineage.py +1 -0
- sqlmesh/core/linter/definition.py +13 -13
- sqlmesh/core/linter/rules/builtin.py +29 -0
- sqlmesh/core/macros.py +35 -13
- sqlmesh/core/model/common.py +2 -0
- sqlmesh/core/model/definition.py +82 -28
- sqlmesh/core/model/kind.py +66 -2
- sqlmesh/core/model/meta.py +108 -4
- sqlmesh/core/node.py +101 -1
- sqlmesh/core/plan/builder.py +18 -10
- sqlmesh/core/plan/common.py +199 -2
- sqlmesh/core/plan/definition.py +25 -6
- sqlmesh/core/plan/evaluator.py +75 -113
- sqlmesh/core/plan/explainer.py +90 -8
- sqlmesh/core/plan/stages.py +42 -21
- sqlmesh/core/renderer.py +78 -32
- sqlmesh/core/scheduler.py +102 -22
- sqlmesh/core/selector.py +137 -9
- sqlmesh/core/signal.py +64 -1
- sqlmesh/core/snapshot/__init__.py +2 -0
- sqlmesh/core/snapshot/definition.py +146 -34
- sqlmesh/core/snapshot/evaluator.py +689 -124
- sqlmesh/core/state_sync/__init__.py +0 -1
- sqlmesh/core/state_sync/base.py +55 -33
- sqlmesh/core/state_sync/cache.py +12 -7
- sqlmesh/core/state_sync/common.py +216 -111
- sqlmesh/core/state_sync/db/environment.py +6 -4
- sqlmesh/core/state_sync/db/facade.py +42 -24
- sqlmesh/core/state_sync/db/interval.py +27 -7
- sqlmesh/core/state_sync/db/migrator.py +34 -16
- sqlmesh/core/state_sync/db/snapshot.py +177 -169
- sqlmesh/core/table_diff.py +2 -2
- sqlmesh/core/test/context.py +2 -0
- sqlmesh/core/test/definition.py +14 -9
- sqlmesh/dbt/adapter.py +22 -16
- sqlmesh/dbt/basemodel.py +75 -56
- sqlmesh/dbt/builtin.py +116 -12
- sqlmesh/dbt/column.py +17 -5
- sqlmesh/dbt/common.py +19 -5
- sqlmesh/dbt/context.py +14 -1
- sqlmesh/dbt/loader.py +61 -9
- sqlmesh/dbt/manifest.py +174 -16
- sqlmesh/dbt/model.py +183 -85
- sqlmesh/dbt/package.py +16 -1
- sqlmesh/dbt/profile.py +3 -3
- sqlmesh/dbt/project.py +12 -7
- sqlmesh/dbt/seed.py +6 -1
- sqlmesh/dbt/source.py +13 -1
- sqlmesh/dbt/target.py +25 -6
- sqlmesh/dbt/test.py +36 -5
- sqlmesh/migrations/v0000_baseline.py +95 -0
- sqlmesh/migrations/v0061_mysql_fix_blob_text_type.py +5 -7
- sqlmesh/migrations/v0062_add_model_gateway.py +5 -1
- sqlmesh/migrations/v0063_change_signals.py +5 -3
- sqlmesh/migrations/v0064_join_when_matched_strings.py +5 -3
- sqlmesh/migrations/v0065_add_model_optimize.py +5 -1
- sqlmesh/migrations/v0066_add_auto_restatements.py +8 -3
- sqlmesh/migrations/v0067_add_tsql_date_full_precision.py +5 -1
- sqlmesh/migrations/v0068_include_unrendered_query_in_metadata_hash.py +5 -1
- sqlmesh/migrations/v0069_update_dev_table_suffix.py +5 -3
- sqlmesh/migrations/v0070_include_grains_in_metadata_hash.py +5 -1
- sqlmesh/migrations/v0071_add_dev_version_to_intervals.py +9 -5
- sqlmesh/migrations/v0072_add_environment_statements.py +5 -3
- sqlmesh/migrations/v0073_remove_symbolic_disable_restatement.py +5 -3
- sqlmesh/migrations/v0074_add_partition_by_time_column_property.py +5 -1
- sqlmesh/migrations/v0075_remove_validate_query.py +5 -3
- sqlmesh/migrations/v0076_add_cron_tz.py +5 -1
- sqlmesh/migrations/v0077_fix_column_type_hash_calculation.py +5 -1
- sqlmesh/migrations/v0078_warn_if_non_migratable_python_env.py +5 -3
- sqlmesh/migrations/v0079_add_gateway_managed_property.py +10 -5
- sqlmesh/migrations/v0080_add_batch_size_to_scd_type_2_models.py +5 -1
- sqlmesh/migrations/v0081_update_partitioned_by.py +5 -3
- sqlmesh/migrations/v0082_warn_if_incorrectly_duplicated_statements.py +5 -3
- sqlmesh/migrations/v0083_use_sql_for_scd_time_data_type_data_hash.py +5 -1
- sqlmesh/migrations/v0084_normalize_quote_when_matched_and_merge_filter.py +5 -1
- sqlmesh/migrations/v0085_deterministic_repr.py +5 -3
- sqlmesh/migrations/v0086_check_deterministic_bug.py +5 -3
- sqlmesh/migrations/v0087_normalize_blueprint_variables.py +5 -3
- sqlmesh/migrations/v0088_warn_about_variable_python_env_diffs.py +5 -3
- sqlmesh/migrations/v0089_add_virtual_environment_mode.py +5 -1
- sqlmesh/migrations/v0090_add_forward_only_column.py +9 -5
- sqlmesh/migrations/v0091_on_additive_change.py +5 -1
- sqlmesh/migrations/v0092_warn_about_dbt_data_type_diff.py +5 -3
- sqlmesh/migrations/v0093_use_raw_sql_in_fingerprint.py +5 -1
- sqlmesh/migrations/v0094_add_dev_version_and_fingerprint_columns.py +123 -0
- sqlmesh/migrations/v0095_warn_about_dbt_raw_sql_diff.py +49 -0
- sqlmesh/migrations/v0096_remove_plan_dags_table.py +13 -0
- sqlmesh/migrations/v0097_add_dbt_name_in_node.py +9 -0
- sqlmesh/migrations/{v0060_move_audits_to_model.py → v0098_add_dbt_node_info_in_node.py} +33 -16
- sqlmesh/migrations/v0099_add_last_altered_to_intervals.py +25 -0
- sqlmesh/migrations/v0100_add_grants_and_grants_target_layer.py +9 -0
- sqlmesh/utils/__init__.py +8 -1
- sqlmesh/utils/cache.py +5 -1
- sqlmesh/utils/connection_pool.py +2 -1
- sqlmesh/utils/dag.py +65 -10
- sqlmesh/utils/date.py +8 -1
- sqlmesh/utils/errors.py +8 -0
- sqlmesh/utils/jinja.py +54 -4
- sqlmesh/utils/pydantic.py +6 -6
- sqlmesh/utils/windows.py +13 -3
- {sqlmesh-0.213.1.dev1.dist-info → sqlmesh-0.227.2.dev4.dist-info}/METADATA +7 -10
- sqlmesh-0.227.2.dev4.dist-info/RECORD +370 -0
- sqlmesh_dbt/cli.py +70 -7
- sqlmesh_dbt/console.py +14 -6
- sqlmesh_dbt/operations.py +103 -24
- sqlmesh_dbt/selectors.py +39 -1
- web/client/dist/assets/{Audits-Ucsx1GzF.js → Audits-CBiYyyx-.js} +1 -1
- web/client/dist/assets/{Banner-BWDzvavM.js → Banner-DSRbUlO5.js} +1 -1
- web/client/dist/assets/{ChevronDownIcon-D2VL13Ah.js → ChevronDownIcon-MK_nrjD_.js} +1 -1
- web/client/dist/assets/{ChevronRightIcon-DWGYbf1l.js → ChevronRightIcon-CLWtT22Q.js} +1 -1
- web/client/dist/assets/{Content-DdHDZM3I.js → Content-BNuGZN5l.js} +1 -1
- web/client/dist/assets/{Content-Bikfy8fh.js → Content-CSHJyW0n.js} +1 -1
- web/client/dist/assets/{Data-CzAJH7rW.js → Data-C1oRDbLx.js} +1 -1
- web/client/dist/assets/{DataCatalog-BJF11g8f.js → DataCatalog-HXyX2-_j.js} +1 -1
- web/client/dist/assets/{Editor-s0SBpV2y.js → Editor-BDyfpUuw.js} +1 -1
- web/client/dist/assets/{Editor-DgLhgKnm.js → Editor-D0jNItwC.js} +1 -1
- web/client/dist/assets/{Errors-D0m0O1d3.js → Errors-BfuFLcPi.js} +1 -1
- web/client/dist/assets/{FileExplorer-CEv0vXkt.js → FileExplorer-BR9IE3he.js} +1 -1
- web/client/dist/assets/{Footer-BwzXn8Ew.js → Footer-CgBEtiAh.js} +1 -1
- web/client/dist/assets/{Header-6heDkEqG.js → Header-DSqR6nSO.js} +1 -1
- web/client/dist/assets/{Input-obuJsD6k.js → Input-B-oZ6fGO.js} +1 -1
- web/client/dist/assets/Lineage-DYQVwDbD.js +1 -0
- web/client/dist/assets/{ListboxShow-HM9_qyrt.js → ListboxShow-BE5-xevs.js} +1 -1
- web/client/dist/assets/{ModelLineage-zWdKo0U2.js → ModelLineage-DkIFAYo4.js} +1 -1
- web/client/dist/assets/{Models-Bcu66SRz.js → Models-D5dWr8RB.js} +1 -1
- web/client/dist/assets/{Page-BWEEQfIt.js → Page-C-XfU5BR.js} +1 -1
- web/client/dist/assets/{Plan-C4gXCqlf.js → Plan-ZEuTINBq.js} +1 -1
- web/client/dist/assets/{PlusCircleIcon-CVDO651q.js → PlusCircleIcon-DVXAHG8_.js} +1 -1
- web/client/dist/assets/{ReportErrors-BT6xFwAr.js → ReportErrors-B7FEPzMB.js} +1 -1
- web/client/dist/assets/{Root-ryJoBK4h.js → Root-8aZyhPxF.js} +1 -1
- web/client/dist/assets/{SearchList-DB04sPb9.js → SearchList-W_iT2G82.js} +1 -1
- web/client/dist/assets/{SelectEnvironment-CUYcXUu6.js → SelectEnvironment-C65jALmO.js} +1 -1
- web/client/dist/assets/{SourceList-Doo_9ZGp.js → SourceList-DSLO6nVJ.js} +1 -1
- web/client/dist/assets/{SourceListItem-D5Mj7Dly.js → SourceListItem-BHt8d9-I.js} +1 -1
- web/client/dist/assets/{SplitPane-qHmkD1qy.js → SplitPane-CViaZmw6.js} +1 -1
- web/client/dist/assets/{Tests-DH1Z74ML.js → Tests-DhaVt5t1.js} +1 -1
- web/client/dist/assets/{Welcome-DqUJUNMF.js → Welcome-DvpjH-_4.js} +1 -1
- web/client/dist/assets/context-BctCsyGb.js +71 -0
- web/client/dist/assets/{context-Dr54UHLi.js → context-DFNeGsFF.js} +1 -1
- web/client/dist/assets/{editor-DYIP1yQ4.js → editor-CcO28cqd.js} +1 -1
- web/client/dist/assets/{file-DarlIDVi.js → file-CvJN3aZO.js} +1 -1
- web/client/dist/assets/{floating-ui.react-dom-BH3TFvkM.js → floating-ui.react-dom-CjE-JNW1.js} +1 -1
- web/client/dist/assets/{help-Bl8wqaQc.js → help-DuPhjipa.js} +1 -1
- web/client/dist/assets/{index-D1sR7wpN.js → index-C-dJH7yZ.js} +1 -1
- web/client/dist/assets/{index-O3mjYpnE.js → index-Dj0i1-CA.js} +2 -2
- web/client/dist/assets/{plan-CehRrJUG.js → plan-BTRSbjKn.js} +1 -1
- web/client/dist/assets/{popover-CqgMRE0G.js → popover-_Sf0yvOI.js} +1 -1
- web/client/dist/assets/{project-6gxepOhm.js → project-BvSOI8MY.js} +1 -1
- web/client/dist/index.html +1 -1
- sqlmesh/integrations/llm.py +0 -56
- sqlmesh/migrations/v0001_init.py +0 -60
- sqlmesh/migrations/v0002_remove_identify.py +0 -5
- sqlmesh/migrations/v0003_move_batch_size.py +0 -34
- sqlmesh/migrations/v0004_environmnent_add_finalized_at.py +0 -23
- sqlmesh/migrations/v0005_create_seed_table.py +0 -24
- sqlmesh/migrations/v0006_change_seed_hash.py +0 -5
- sqlmesh/migrations/v0007_env_table_info_to_kind.py +0 -99
- sqlmesh/migrations/v0008_create_intervals_table.py +0 -38
- sqlmesh/migrations/v0009_remove_pre_post_hooks.py +0 -62
- sqlmesh/migrations/v0010_seed_hash_batch_size.py +0 -5
- sqlmesh/migrations/v0011_add_model_kind_name.py +0 -63
- sqlmesh/migrations/v0012_update_jinja_expressions.py +0 -86
- sqlmesh/migrations/v0013_serde_using_model_dialects.py +0 -87
- sqlmesh/migrations/v0014_fix_dev_intervals.py +0 -14
- sqlmesh/migrations/v0015_environment_add_promoted_snapshot_ids.py +0 -26
- sqlmesh/migrations/v0016_fix_windows_path.py +0 -59
- sqlmesh/migrations/v0017_fix_windows_seed_path.py +0 -55
- sqlmesh/migrations/v0018_rename_snapshot_model_to_node.py +0 -53
- sqlmesh/migrations/v0019_add_env_suffix_target.py +0 -28
- sqlmesh/migrations/v0020_remove_redundant_attributes_from_dbt_models.py +0 -80
- sqlmesh/migrations/v0021_fix_table_properties.py +0 -62
- sqlmesh/migrations/v0022_move_project_to_model.py +0 -54
- sqlmesh/migrations/v0023_fix_added_models_with_forward_only_parents.py +0 -65
- sqlmesh/migrations/v0024_replace_model_kind_name_enum_with_value.py +0 -55
- sqlmesh/migrations/v0025_fix_intervals_and_missing_change_category.py +0 -117
- sqlmesh/migrations/v0026_remove_dialect_from_seed.py +0 -55
- sqlmesh/migrations/v0027_minute_interval_to_five.py +0 -57
- sqlmesh/migrations/v0028_add_plan_dags_table.py +0 -29
- sqlmesh/migrations/v0029_generate_schema_types_using_dialect.py +0 -69
- sqlmesh/migrations/v0030_update_unrestorable_snapshots.py +0 -65
- sqlmesh/migrations/v0031_remove_dbt_target_fields.py +0 -65
- sqlmesh/migrations/v0032_add_sqlmesh_version.py +0 -25
- sqlmesh/migrations/v0033_mysql_fix_blob_text_type.py +0 -45
- sqlmesh/migrations/v0034_add_default_catalog.py +0 -367
- sqlmesh/migrations/v0035_add_catalog_name_override.py +0 -22
- sqlmesh/migrations/v0036_delete_plan_dags_bug_fix.py +0 -14
- sqlmesh/migrations/v0037_remove_dbt_is_incremental_macro.py +0 -61
- sqlmesh/migrations/v0038_add_expiration_ts_to_snapshot.py +0 -73
- sqlmesh/migrations/v0039_include_environment_in_plan_dag_spec.py +0 -68
- sqlmesh/migrations/v0040_add_previous_finalized_snapshots.py +0 -26
- sqlmesh/migrations/v0041_remove_hash_raw_query_attribute.py +0 -59
- sqlmesh/migrations/v0042_trim_indirect_versions.py +0 -66
- sqlmesh/migrations/v0043_fix_remove_obsolete_attributes_in_plan_dags.py +0 -61
- sqlmesh/migrations/v0044_quote_identifiers_in_model_attributes.py +0 -5
- sqlmesh/migrations/v0045_move_gateway_variable.py +0 -70
- sqlmesh/migrations/v0046_add_batch_concurrency.py +0 -8
- sqlmesh/migrations/v0047_change_scd_string_to_column.py +0 -5
- sqlmesh/migrations/v0048_drop_indirect_versions.py +0 -59
- sqlmesh/migrations/v0049_replace_identifier_with_version_in_seeds_table.py +0 -57
- sqlmesh/migrations/v0050_drop_seeds_table.py +0 -11
- sqlmesh/migrations/v0051_rename_column_descriptions.py +0 -65
- sqlmesh/migrations/v0052_add_normalize_name_in_environment_naming_info.py +0 -28
- sqlmesh/migrations/v0053_custom_model_kind_extra_attributes.py +0 -5
- sqlmesh/migrations/v0054_fix_trailing_comments.py +0 -5
- sqlmesh/migrations/v0055_add_updated_ts_unpaused_ts_ttl_ms_unrestorable_to_snapshot.py +0 -132
- sqlmesh/migrations/v0056_restore_table_indexes.py +0 -118
- sqlmesh/migrations/v0057_add_table_format.py +0 -5
- sqlmesh/migrations/v0058_add_requirements.py +0 -26
- sqlmesh/migrations/v0059_add_physical_version.py +0 -5
- sqlmesh-0.213.1.dev1.dist-info/RECORD +0 -421
- web/client/dist/assets/Lineage-D0Hgdz2v.js +0 -1
- web/client/dist/assets/context-DgX0fp2E.js +0 -68
- {sqlmesh-0.213.1.dev1.dist-info → sqlmesh-0.227.2.dev4.dist-info}/WHEEL +0 -0
- {sqlmesh-0.213.1.dev1.dist-info → sqlmesh-0.227.2.dev4.dist-info}/entry_points.txt +0 -0
- {sqlmesh-0.213.1.dev1.dist-info → sqlmesh-0.227.2.dev4.dist-info}/licenses/LICENSE +0 -0
- {sqlmesh-0.213.1.dev1.dist-info → sqlmesh-0.227.2.dev4.dist-info}/top_level.txt +0 -0
sqlmesh/core/model/meta.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import typing as t
|
|
4
|
+
from enum import Enum
|
|
4
5
|
from functools import cached_property
|
|
5
6
|
from typing_extensions import Self
|
|
6
7
|
|
|
@@ -13,6 +14,7 @@ from sqlmesh.core import dialect as d
|
|
|
13
14
|
from sqlmesh.core.config.common import VirtualEnvironmentMode
|
|
14
15
|
from sqlmesh.core.config.linter import LinterConfig
|
|
15
16
|
from sqlmesh.core.dialect import normalize_model_name
|
|
17
|
+
from sqlmesh.utils import classproperty
|
|
16
18
|
from sqlmesh.core.model.common import (
|
|
17
19
|
bool_validator,
|
|
18
20
|
default_catalog_validator,
|
|
@@ -29,7 +31,6 @@ from sqlmesh.core.model.kind import (
|
|
|
29
31
|
SCDType2ByTimeKind,
|
|
30
32
|
TimeColumn,
|
|
31
33
|
ViewKind,
|
|
32
|
-
_IncrementalBy,
|
|
33
34
|
model_kind_validator,
|
|
34
35
|
OnAdditiveChange,
|
|
35
36
|
)
|
|
@@ -47,10 +48,41 @@ from sqlmesh.utils.pydantic import (
|
|
|
47
48
|
|
|
48
49
|
if t.TYPE_CHECKING:
|
|
49
50
|
from sqlmesh.core._typing import CustomMaterializationProperties, SessionProperties
|
|
51
|
+
from sqlmesh.core.engine_adapter._typing import GrantsConfig
|
|
50
52
|
|
|
51
53
|
FunctionCall = t.Tuple[str, t.Dict[str, exp.Expression]]
|
|
52
54
|
|
|
53
55
|
|
|
56
|
+
class GrantsTargetLayer(str, Enum):
|
|
57
|
+
"""Target layer(s) where grants should be applied."""
|
|
58
|
+
|
|
59
|
+
ALL = "all"
|
|
60
|
+
PHYSICAL = "physical"
|
|
61
|
+
VIRTUAL = "virtual"
|
|
62
|
+
|
|
63
|
+
@classproperty
|
|
64
|
+
def default(cls) -> "GrantsTargetLayer":
|
|
65
|
+
return GrantsTargetLayer.VIRTUAL
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def is_all(self) -> bool:
|
|
69
|
+
return self == GrantsTargetLayer.ALL
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def is_physical(self) -> bool:
|
|
73
|
+
return self == GrantsTargetLayer.PHYSICAL
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def is_virtual(self) -> bool:
|
|
77
|
+
return self == GrantsTargetLayer.VIRTUAL
|
|
78
|
+
|
|
79
|
+
def __str__(self) -> str:
|
|
80
|
+
return self.name
|
|
81
|
+
|
|
82
|
+
def __repr__(self) -> str:
|
|
83
|
+
return str(self)
|
|
84
|
+
|
|
85
|
+
|
|
54
86
|
class ModelMeta(_Node):
|
|
55
87
|
"""Metadata for models which can be defined in SQL."""
|
|
56
88
|
|
|
@@ -86,6 +118,8 @@ class ModelMeta(_Node):
|
|
|
86
118
|
)
|
|
87
119
|
formatting: t.Optional[bool] = Field(default=None, exclude=True)
|
|
88
120
|
virtual_environment_mode: VirtualEnvironmentMode = VirtualEnvironmentMode.default
|
|
121
|
+
grants_: t.Optional[exp.Tuple] = Field(default=None, alias="grants")
|
|
122
|
+
grants_target_layer: GrantsTargetLayer = GrantsTargetLayer.default
|
|
89
123
|
|
|
90
124
|
_bool_validator = bool_validator
|
|
91
125
|
_model_kind_validator = model_kind_validator
|
|
@@ -248,11 +282,15 @@ class ModelMeta(_Node):
|
|
|
248
282
|
|
|
249
283
|
columns_to_types = info.data.get("columns_to_types_")
|
|
250
284
|
if columns_to_types:
|
|
251
|
-
|
|
285
|
+
from sqlmesh.core.console import get_console
|
|
286
|
+
|
|
287
|
+
console = get_console()
|
|
288
|
+
for column_name in list(col_descriptions):
|
|
252
289
|
if column_name not in columns_to_types:
|
|
253
|
-
|
|
290
|
+
console.log_warning(
|
|
254
291
|
f"In model '{info.data['name']}', a description is provided for column '{column_name}' but it is not a column in the model."
|
|
255
292
|
)
|
|
293
|
+
del col_descriptions[column_name]
|
|
256
294
|
|
|
257
295
|
return col_descriptions
|
|
258
296
|
|
|
@@ -284,6 +322,14 @@ class ModelMeta(_Node):
|
|
|
284
322
|
def ignored_rules_validator(cls, vs: t.Any) -> t.Any:
|
|
285
323
|
return LinterConfig._validate_rules(vs)
|
|
286
324
|
|
|
325
|
+
@field_validator("grants_target_layer", mode="before")
|
|
326
|
+
def _grants_target_layer_validator(cls, v: t.Any) -> t.Any:
|
|
327
|
+
if isinstance(v, exp.Identifier):
|
|
328
|
+
return v.this
|
|
329
|
+
if isinstance(v, exp.Literal) and v.is_string:
|
|
330
|
+
return v.this
|
|
331
|
+
return v
|
|
332
|
+
|
|
287
333
|
@field_validator("session_properties_", mode="before")
|
|
288
334
|
def session_properties_validator(cls, v: t.Any, info: ValidationInfo) -> t.Any:
|
|
289
335
|
# use the generic properties validator to parse the session properties
|
|
@@ -391,6 +437,10 @@ class ModelMeta(_Node):
|
|
|
391
437
|
f"Model {self.name} has `storage_format` set to a table format '{storage_format}' which is deprecated. Please use the `table_format` property instead."
|
|
392
438
|
)
|
|
393
439
|
|
|
440
|
+
# Validate grants configuration for model kind support
|
|
441
|
+
if self.grants is not None and not kind.supports_grants:
|
|
442
|
+
raise ValueError(f"grants cannot be set for {kind.name} models")
|
|
443
|
+
|
|
394
444
|
return self
|
|
395
445
|
|
|
396
446
|
@property
|
|
@@ -414,7 +464,7 @@ class ModelMeta(_Node):
|
|
|
414
464
|
@property
|
|
415
465
|
def lookback(self) -> int:
|
|
416
466
|
"""The incremental lookback window."""
|
|
417
|
-
return (self.kind
|
|
467
|
+
return getattr(self.kind, "lookback", 0) or 0
|
|
418
468
|
|
|
419
469
|
def lookback_start(self, start: TimeLike) -> TimeLike:
|
|
420
470
|
if self.lookback == 0:
|
|
@@ -462,6 +512,30 @@ class ModelMeta(_Node):
|
|
|
462
512
|
return self.kind.materialization_properties
|
|
463
513
|
return {}
|
|
464
514
|
|
|
515
|
+
@cached_property
|
|
516
|
+
def grants(self) -> t.Optional[GrantsConfig]:
|
|
517
|
+
"""A dictionary of grants mapping permission names to lists of grantees."""
|
|
518
|
+
|
|
519
|
+
if self.grants_ is None:
|
|
520
|
+
return None
|
|
521
|
+
|
|
522
|
+
if not self.grants_.expressions:
|
|
523
|
+
return {}
|
|
524
|
+
|
|
525
|
+
grants_dict = {}
|
|
526
|
+
for eq_expr in self.grants_.expressions:
|
|
527
|
+
try:
|
|
528
|
+
permission_name = self._validate_config_expression(eq_expr.left)
|
|
529
|
+
grantee_list = self._validate_nested_config_values(eq_expr.expression)
|
|
530
|
+
grants_dict[permission_name] = grantee_list
|
|
531
|
+
except ConfigError as e:
|
|
532
|
+
permission_name = (
|
|
533
|
+
eq_expr.left.name if hasattr(eq_expr.left, "name") else str(eq_expr.left)
|
|
534
|
+
)
|
|
535
|
+
raise ConfigError(f"Invalid grants configuration for '{permission_name}': {e}")
|
|
536
|
+
|
|
537
|
+
return grants_dict if grants_dict else None
|
|
538
|
+
|
|
465
539
|
@property
|
|
466
540
|
def all_references(self) -> t.List[Reference]:
|
|
467
541
|
"""All references including grains."""
|
|
@@ -526,3 +600,33 @@ class ModelMeta(_Node):
|
|
|
526
600
|
@property
|
|
527
601
|
def ignored_rules(self) -> t.Set[str]:
|
|
528
602
|
return self.ignored_rules_ or set()
|
|
603
|
+
|
|
604
|
+
def _validate_config_expression(self, expr: exp.Expression) -> str:
|
|
605
|
+
if isinstance(expr, (d.MacroFunc, d.MacroVar)):
|
|
606
|
+
raise ConfigError(f"Unresolved macro: {expr.sql(dialect=self.dialect)}")
|
|
607
|
+
|
|
608
|
+
if isinstance(expr, exp.Null):
|
|
609
|
+
raise ConfigError("NULL value")
|
|
610
|
+
|
|
611
|
+
if isinstance(expr, exp.Literal):
|
|
612
|
+
return str(expr.this).strip()
|
|
613
|
+
if isinstance(expr, (exp.Column, exp.Identifier)):
|
|
614
|
+
return expr.name
|
|
615
|
+
return expr.sql(dialect=self.dialect).strip()
|
|
616
|
+
|
|
617
|
+
def _validate_nested_config_values(self, value_expr: exp.Expression) -> t.List[str]:
|
|
618
|
+
result = []
|
|
619
|
+
|
|
620
|
+
def flatten_expr(expr: exp.Expression) -> None:
|
|
621
|
+
if isinstance(expr, exp.Array):
|
|
622
|
+
for elem in expr.expressions:
|
|
623
|
+
flatten_expr(elem)
|
|
624
|
+
elif isinstance(expr, (exp.Tuple, exp.Paren)):
|
|
625
|
+
expressions = [expr.unnest()] if isinstance(expr, exp.Paren) else expr.expressions
|
|
626
|
+
for elem in expressions:
|
|
627
|
+
flatten_expr(elem)
|
|
628
|
+
else:
|
|
629
|
+
result.append(self._validate_config_expression(expr))
|
|
630
|
+
|
|
631
|
+
flatten_expr(value_expr)
|
|
632
|
+
return result
|
sqlmesh/core/node.py
CHANGED
|
@@ -153,6 +153,101 @@ class IntervalUnit(str, Enum):
|
|
|
153
153
|
return self.seconds * 1000
|
|
154
154
|
|
|
155
155
|
|
|
156
|
+
class DbtNodeInfo(PydanticModel):
|
|
157
|
+
"""
|
|
158
|
+
Represents dbt-specific model information set by the dbt loader and intended to be made available at the Snapshot level
|
|
159
|
+
(as opposed to hidden within the individual model jinja macro registries).
|
|
160
|
+
|
|
161
|
+
This allows for things like injecting implementations of variables / functions into the Jinja context that are compatible with
|
|
162
|
+
their dbt equivalents but are backed by the sqlmesh snapshots in any given plan / environment
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
unique_id: str
|
|
166
|
+
"""This is the node/resource name/unique_id that's used as the node key in the dbt manifest.
|
|
167
|
+
It's prefixed by the resource type and is exposed in context variables like {{ selected_resources }}.
|
|
168
|
+
|
|
169
|
+
Examples:
|
|
170
|
+
- test.jaffle_shop.unique_stg_orders_order_id.e3b841c71a
|
|
171
|
+
- seed.jaffle_shop.raw_payments
|
|
172
|
+
- model.jaffle_shop.stg_orders
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
name: str
|
|
176
|
+
"""Name of this object in the dbt global namespace, used by things like {{ ref() }} calls.
|
|
177
|
+
|
|
178
|
+
Examples:
|
|
179
|
+
- unique_stg_orders_order_id
|
|
180
|
+
- raw_payments
|
|
181
|
+
- stg_orders
|
|
182
|
+
"""
|
|
183
|
+
|
|
184
|
+
fqn: str
|
|
185
|
+
"""Used for selectors in --select/--exclude.
|
|
186
|
+
Takes the filesystem into account so may be structured differently to :unique_id.
|
|
187
|
+
|
|
188
|
+
Examples:
|
|
189
|
+
- jaffle_shop.staging.unique_stg_orders_order_id
|
|
190
|
+
- jaffle_shop.raw_payments
|
|
191
|
+
- jaffle_shop.staging.stg_orders
|
|
192
|
+
"""
|
|
193
|
+
|
|
194
|
+
alias: t.Optional[str] = None
|
|
195
|
+
"""This is dbt's way of overriding the _physical table_ a model is written to.
|
|
196
|
+
|
|
197
|
+
It's used in the following situation:
|
|
198
|
+
- Say you have two models, "stg_customers" and "customers"
|
|
199
|
+
- You want "stg_customers" to be written to the "staging" schema as eg "staging.customers" - NOT "staging.stg_customers"
|
|
200
|
+
- But you cant rename the file to "customers" because it will conflict with your other model file "customers"
|
|
201
|
+
- Even if you put it in a different folder, eg "staging/customers.sql" - dbt still has a global namespace so it will conflict
|
|
202
|
+
when you try to do something like "{{ ref('customers') }}"
|
|
203
|
+
- So dbt's solution to this problem is to keep calling it "stg_customers" at the dbt project/model level,
|
|
204
|
+
but allow overriding the physical table to "customers" via something like "{{ config(alias='customers', schema='staging') }}"
|
|
205
|
+
|
|
206
|
+
Note that if :alias is set, it does *not* replace :name at the model level and cannot be used interchangably with :name.
|
|
207
|
+
It also does not affect the :fqn or :unique_id. It's just used to override :name when it comes time to generate the physical table name.
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
@model_validator(mode="after")
|
|
211
|
+
def post_init(self) -> Self:
|
|
212
|
+
# by default, dbt sets alias to the same as :name
|
|
213
|
+
# however, we only want to include :alias if it is actually different / actually providing an override
|
|
214
|
+
if self.alias == self.name:
|
|
215
|
+
self.alias = None
|
|
216
|
+
return self
|
|
217
|
+
|
|
218
|
+
def to_expression(self) -> exp.Expression:
|
|
219
|
+
"""Produce a SQLGlot expression representing this object, for use in things like the model/audit definition renderers"""
|
|
220
|
+
return exp.tuple_(
|
|
221
|
+
*(
|
|
222
|
+
exp.PropertyEQ(this=exp.var(k), expression=exp.Literal.string(v))
|
|
223
|
+
for k, v in sorted(self.model_dump(exclude_none=True).items())
|
|
224
|
+
)
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
class DbtInfoMixin:
|
|
229
|
+
"""This mixin encapsulates properties that only exist for dbt compatibility and are otherwise not required
|
|
230
|
+
for native projects"""
|
|
231
|
+
|
|
232
|
+
@property
|
|
233
|
+
def dbt_node_info(self) -> t.Optional[DbtNodeInfo]:
|
|
234
|
+
raise NotImplementedError()
|
|
235
|
+
|
|
236
|
+
@property
|
|
237
|
+
def dbt_unique_id(self) -> t.Optional[str]:
|
|
238
|
+
"""Used for compatibility with jinja context variables such as {{ selected_resources }}"""
|
|
239
|
+
if self.dbt_node_info:
|
|
240
|
+
return self.dbt_node_info.unique_id
|
|
241
|
+
return None
|
|
242
|
+
|
|
243
|
+
@property
|
|
244
|
+
def dbt_fqn(self) -> t.Optional[str]:
|
|
245
|
+
"""Used in the selector engine for compatibility with selectors that select models by dbt fqn"""
|
|
246
|
+
if self.dbt_node_info:
|
|
247
|
+
return self.dbt_node_info.fqn
|
|
248
|
+
return None
|
|
249
|
+
|
|
250
|
+
|
|
156
251
|
# this must be sorted in descending order
|
|
157
252
|
INTERVAL_SECONDS = {
|
|
158
253
|
IntervalUnit.YEAR: 60 * 60 * 24 * 365,
|
|
@@ -165,7 +260,7 @@ INTERVAL_SECONDS = {
|
|
|
165
260
|
}
|
|
166
261
|
|
|
167
262
|
|
|
168
|
-
class _Node(PydanticModel):
|
|
263
|
+
class _Node(DbtInfoMixin, PydanticModel):
|
|
169
264
|
"""
|
|
170
265
|
Node is the core abstraction for entity that can be executed within the scheduler.
|
|
171
266
|
|
|
@@ -199,6 +294,7 @@ class _Node(PydanticModel):
|
|
|
199
294
|
interval_unit_: t.Optional[IntervalUnit] = Field(alias="interval_unit", default=None)
|
|
200
295
|
tags: t.List[str] = []
|
|
201
296
|
stamp: t.Optional[str] = None
|
|
297
|
+
dbt_node_info_: t.Optional[DbtNodeInfo] = Field(alias="dbt_node_info", default=None)
|
|
202
298
|
_path: t.Optional[Path] = None
|
|
203
299
|
_data_hash: t.Optional[str] = None
|
|
204
300
|
_metadata_hash: t.Optional[str] = None
|
|
@@ -445,6 +541,10 @@ class _Node(PydanticModel):
|
|
|
445
541
|
"""Return True if this is an audit node"""
|
|
446
542
|
return False
|
|
447
543
|
|
|
544
|
+
@property
|
|
545
|
+
def dbt_node_info(self) -> t.Optional[DbtNodeInfo]:
|
|
546
|
+
return self.dbt_node_info_
|
|
547
|
+
|
|
448
548
|
|
|
449
549
|
class NodeType(str, Enum):
|
|
450
550
|
MODEL = "model"
|
sqlmesh/core/plan/builder.py
CHANGED
|
@@ -65,6 +65,9 @@ class PlanBuilder:
|
|
|
65
65
|
restate_models: A list of models for which the data should be restated for the time range
|
|
66
66
|
specified in this plan. Note: models defined outside SQLMesh (external) won't be a part
|
|
67
67
|
of the restatement.
|
|
68
|
+
restate_all_snapshots: If restatements are present, this flag indicates whether or not the intervals
|
|
69
|
+
being restated should be cleared from state for other versions of this model (typically, versions that are present in other environments).
|
|
70
|
+
If set to None, the default behaviour is to not clear anything unless the target environment is prod.
|
|
68
71
|
backfill_models: A list of fully qualified model names for which the data should be backfilled as part of this plan.
|
|
69
72
|
no_gaps: Whether to ensure that new snapshots for nodes that are already a
|
|
70
73
|
part of the target environment have no data gaps when compared against previous
|
|
@@ -103,6 +106,7 @@ class PlanBuilder:
|
|
|
103
106
|
execution_time: t.Optional[TimeLike] = None,
|
|
104
107
|
apply: t.Optional[t.Callable[[Plan], None]] = None,
|
|
105
108
|
restate_models: t.Optional[t.Iterable[str]] = None,
|
|
109
|
+
restate_all_snapshots: bool = False,
|
|
106
110
|
backfill_models: t.Optional[t.Iterable[str]] = None,
|
|
107
111
|
no_gaps: bool = False,
|
|
108
112
|
skip_backfill: bool = False,
|
|
@@ -129,6 +133,7 @@ class PlanBuilder:
|
|
|
129
133
|
end_override_per_model: t.Optional[t.Dict[str, datetime]] = None,
|
|
130
134
|
console: t.Optional[PlanBuilderConsole] = None,
|
|
131
135
|
user_provided_flags: t.Optional[t.Dict[str, UserProvidedFlags]] = None,
|
|
136
|
+
selected_models: t.Optional[t.Set[str]] = None,
|
|
132
137
|
):
|
|
133
138
|
self._context_diff = context_diff
|
|
134
139
|
self._no_gaps = no_gaps
|
|
@@ -153,6 +158,7 @@ class PlanBuilder:
|
|
|
153
158
|
self._auto_categorization_enabled = auto_categorization_enabled
|
|
154
159
|
self._include_unmodified = include_unmodified
|
|
155
160
|
self._restate_models = set(restate_models) if restate_models is not None else None
|
|
161
|
+
self._restate_all_snapshots = restate_all_snapshots
|
|
156
162
|
self._effective_from = effective_from
|
|
157
163
|
|
|
158
164
|
# note: this deliberately doesnt default to now() here.
|
|
@@ -169,6 +175,7 @@ class PlanBuilder:
|
|
|
169
175
|
self._console = console or get_console()
|
|
170
176
|
self._choices: t.Dict[SnapshotId, SnapshotChangeCategory] = {}
|
|
171
177
|
self._user_provided_flags = user_provided_flags
|
|
178
|
+
self._selected_models = selected_models
|
|
172
179
|
self._explain = explain
|
|
173
180
|
|
|
174
181
|
self._start = start
|
|
@@ -275,7 +282,6 @@ class PlanBuilder:
|
|
|
275
282
|
if self._latest_plan:
|
|
276
283
|
return self._latest_plan
|
|
277
284
|
|
|
278
|
-
self._ensure_no_new_snapshots_with_restatements()
|
|
279
285
|
self._ensure_new_env_with_changes()
|
|
280
286
|
self._ensure_valid_date_range()
|
|
281
287
|
self._ensure_no_broken_references()
|
|
@@ -336,7 +342,9 @@ class PlanBuilder:
|
|
|
336
342
|
directly_modified=directly_modified,
|
|
337
343
|
indirectly_modified=indirectly_modified,
|
|
338
344
|
deployability_index=deployability_index,
|
|
345
|
+
selected_models_to_restate=self._restate_models,
|
|
339
346
|
restatements=restatements,
|
|
347
|
+
restate_all_snapshots=self._restate_all_snapshots,
|
|
340
348
|
start_override_per_model=self._start_override_per_model,
|
|
341
349
|
end_override_per_model=end_override_per_model,
|
|
342
350
|
selected_models_to_backfill=self._backfill_models,
|
|
@@ -347,6 +355,7 @@ class PlanBuilder:
|
|
|
347
355
|
ensure_finalized_snapshots=self._ensure_finalized_snapshots,
|
|
348
356
|
ignore_cron=self._ignore_cron,
|
|
349
357
|
user_provided_flags=self._user_provided_flags,
|
|
358
|
+
selected_models=self._selected_models,
|
|
350
359
|
)
|
|
351
360
|
self._latest_plan = plan
|
|
352
361
|
return plan
|
|
@@ -671,6 +680,14 @@ class PlanBuilder:
|
|
|
671
680
|
if mode == AutoCategorizationMode.FULL:
|
|
672
681
|
snapshot.categorize_as(SnapshotChangeCategory.BREAKING, forward_only)
|
|
673
682
|
elif self._context_diff.indirectly_modified(snapshot.name):
|
|
683
|
+
if snapshot.is_materialized_view and not forward_only:
|
|
684
|
+
# We categorize changes as breaking to allow for instantaneous switches in a virtual layer.
|
|
685
|
+
# Otherwise, there might be a potentially long downtime during MVs recreation.
|
|
686
|
+
# In the case of forward-only changes this optimization is not applicable because we want to continue
|
|
687
|
+
# using the same (existing) table version.
|
|
688
|
+
snapshot.categorize_as(SnapshotChangeCategory.INDIRECT_BREAKING, forward_only)
|
|
689
|
+
return
|
|
690
|
+
|
|
674
691
|
all_upstream_forward_only = set()
|
|
675
692
|
all_upstream_categories = set()
|
|
676
693
|
direct_parent_categories = set()
|
|
@@ -855,15 +872,6 @@ class PlanBuilder:
|
|
|
855
872
|
f"""Removed {broken_references_msg} are referenced in '{snapshot.name}'. Please remove broken references before proceeding."""
|
|
856
873
|
)
|
|
857
874
|
|
|
858
|
-
def _ensure_no_new_snapshots_with_restatements(self) -> None:
|
|
859
|
-
if self._restate_models is not None and (
|
|
860
|
-
self._context_diff.new_snapshots or self._context_diff.modified_snapshots
|
|
861
|
-
):
|
|
862
|
-
raise PlanError(
|
|
863
|
-
"Model changes and restatements can't be a part of the same plan. "
|
|
864
|
-
"Revert or apply changes before proceeding with restatements."
|
|
865
|
-
)
|
|
866
|
-
|
|
867
875
|
def _ensure_new_env_with_changes(self) -> None:
|
|
868
876
|
if (
|
|
869
877
|
self._is_dev
|
sqlmesh/core/plan/common.py
CHANGED
|
@@ -1,16 +1,40 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
import typing as t
|
|
3
|
+
import logging
|
|
4
|
+
from dataclasses import dataclass, field
|
|
2
5
|
|
|
3
|
-
from sqlmesh.core.
|
|
6
|
+
from sqlmesh.core.state_sync import StateReader
|
|
7
|
+
from sqlmesh.core.snapshot import Snapshot, SnapshotId, SnapshotIdAndVersion, SnapshotNameVersion
|
|
8
|
+
from sqlmesh.core.snapshot.definition import Interval
|
|
9
|
+
from sqlmesh.utils.dag import DAG
|
|
10
|
+
from sqlmesh.utils.date import now_timestamp
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
4
13
|
|
|
5
14
|
|
|
6
15
|
def should_force_rebuild(old: Snapshot, new: Snapshot) -> bool:
|
|
7
16
|
if new.is_view and new.is_indirect_non_breaking and not new.is_forward_only:
|
|
8
|
-
# View models always need to be rebuilt to reflect updated upstream dependencies
|
|
17
|
+
# View models always need to be rebuilt to reflect updated upstream dependencies
|
|
18
|
+
return True
|
|
19
|
+
if new.is_seed and not (
|
|
20
|
+
new.is_metadata
|
|
21
|
+
and new.previous_version
|
|
22
|
+
and new.previous_version.snapshot_id(new.name) == old.snapshot_id
|
|
23
|
+
):
|
|
24
|
+
# Seed models always need to be rebuilt to reflect changes in the seed file
|
|
25
|
+
# Unless only their metadata has been updated (eg description added) and the seed file has not been touched
|
|
9
26
|
return True
|
|
10
27
|
return is_breaking_kind_change(old, new)
|
|
11
28
|
|
|
12
29
|
|
|
13
30
|
def is_breaking_kind_change(old: Snapshot, new: Snapshot) -> bool:
|
|
31
|
+
if new.is_model != old.is_model:
|
|
32
|
+
# If one is a model and the other isn't, then we need to rebuild
|
|
33
|
+
return True
|
|
34
|
+
if not new.is_model or not old.is_model:
|
|
35
|
+
# If neither are models, then we don't need to rebuild
|
|
36
|
+
# Note that the remaining checks only apply to model snapshots
|
|
37
|
+
return False
|
|
14
38
|
if old.virtual_environment_mode != new.virtual_environment_mode:
|
|
15
39
|
# If the virtual environment mode has changed, then we need to rebuild
|
|
16
40
|
return True
|
|
@@ -24,3 +48,176 @@ def is_breaking_kind_change(old: Snapshot, new: Snapshot) -> bool:
|
|
|
24
48
|
# If the partitioning hasn't changed, then we don't need to rebuild
|
|
25
49
|
return False
|
|
26
50
|
return True
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass
|
|
54
|
+
class SnapshotIntervalClearRequest:
|
|
55
|
+
# affected snapshot
|
|
56
|
+
snapshot: SnapshotIdAndVersion
|
|
57
|
+
|
|
58
|
+
# which interval to clear
|
|
59
|
+
interval: Interval
|
|
60
|
+
|
|
61
|
+
# which environments this snapshot is currently promoted
|
|
62
|
+
# note that this can be empty if the snapshot exists because its ttl has not expired
|
|
63
|
+
# but it is not part of any particular environment
|
|
64
|
+
environment_names: t.Set[str] = field(default_factory=set)
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def snapshot_id(self) -> SnapshotId:
|
|
68
|
+
return self.snapshot.snapshot_id
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def sorted_environment_names(self) -> t.List[str]:
|
|
72
|
+
return list(sorted(self.environment_names))
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def identify_restatement_intervals_across_snapshot_versions(
|
|
76
|
+
state_reader: StateReader,
|
|
77
|
+
prod_restatements: t.Dict[str, Interval],
|
|
78
|
+
disable_restatement_models: t.Set[str],
|
|
79
|
+
loaded_snapshots: t.Dict[SnapshotId, Snapshot],
|
|
80
|
+
current_ts: t.Optional[int] = None,
|
|
81
|
+
) -> t.Dict[SnapshotId, SnapshotIntervalClearRequest]:
|
|
82
|
+
"""
|
|
83
|
+
Given a map of snapshot names + intervals to restate in prod:
|
|
84
|
+
- Look up matching snapshots (match based on name - regardless of version, to get all versions)
|
|
85
|
+
- For each match, also match downstream snapshots in each dev environment while filtering out models that have restatement disabled
|
|
86
|
+
- Return a list of all snapshots that are affected + the interval that needs to be cleared for each
|
|
87
|
+
|
|
88
|
+
The goal here is to produce a list of intervals to invalidate across all dev snapshots so that a subsequent plan or
|
|
89
|
+
cadence run in those environments causes the intervals to be repopulated.
|
|
90
|
+
"""
|
|
91
|
+
if not prod_restatements:
|
|
92
|
+
return {}
|
|
93
|
+
|
|
94
|
+
# Although :loaded_snapshots is sourced from RestatementStage.all_snapshots, since the only time we ever need
|
|
95
|
+
# to clear intervals across all environments is for prod, the :loaded_snapshots here are always from prod
|
|
96
|
+
prod_name_versions: t.Set[SnapshotNameVersion] = {
|
|
97
|
+
s.name_version for s in loaded_snapshots.values()
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
snapshot_intervals_to_clear: t.Dict[SnapshotId, SnapshotIntervalClearRequest] = {}
|
|
101
|
+
|
|
102
|
+
for env_summary in state_reader.get_environments_summary():
|
|
103
|
+
# Fetch the full environment object one at a time to avoid loading all environments into memory at once
|
|
104
|
+
env = state_reader.get_environment(env_summary.name)
|
|
105
|
+
if not env:
|
|
106
|
+
logger.warning("Environment %s not found", env_summary.name)
|
|
107
|
+
continue
|
|
108
|
+
|
|
109
|
+
snapshots_by_name = {s.name: s.table_info for s in env.snapshots}
|
|
110
|
+
|
|
111
|
+
# We dont just restate matching snapshots, we also have to restate anything downstream of them
|
|
112
|
+
# so that if A gets restated in prod and dev has A <- B <- C, B and C get restated in dev
|
|
113
|
+
env_dag = DAG({s.name: {p.name for p in s.parents} for s in env.snapshots})
|
|
114
|
+
|
|
115
|
+
for restate_snapshot_name, interval in prod_restatements.items():
|
|
116
|
+
if restate_snapshot_name not in snapshots_by_name:
|
|
117
|
+
# snapshot is not promoted in this environment
|
|
118
|
+
continue
|
|
119
|
+
|
|
120
|
+
affected_snapshot_names = [
|
|
121
|
+
x
|
|
122
|
+
for x in ([restate_snapshot_name] + env_dag.downstream(restate_snapshot_name))
|
|
123
|
+
if x not in disable_restatement_models
|
|
124
|
+
]
|
|
125
|
+
|
|
126
|
+
for affected_snapshot_name in affected_snapshot_names:
|
|
127
|
+
affected_snapshot = snapshots_by_name[affected_snapshot_name]
|
|
128
|
+
|
|
129
|
+
# Don't clear intervals for a dev snapshot if it shares the same physical version with prod.
|
|
130
|
+
# Otherwise, prod will be affected by what should be a dev operation
|
|
131
|
+
if affected_snapshot.name_version in prod_name_versions:
|
|
132
|
+
continue
|
|
133
|
+
|
|
134
|
+
clear_request = snapshot_intervals_to_clear.get(affected_snapshot.snapshot_id)
|
|
135
|
+
if not clear_request:
|
|
136
|
+
clear_request = SnapshotIntervalClearRequest(
|
|
137
|
+
snapshot=affected_snapshot.id_and_version, interval=interval
|
|
138
|
+
)
|
|
139
|
+
snapshot_intervals_to_clear[affected_snapshot.snapshot_id] = clear_request
|
|
140
|
+
|
|
141
|
+
clear_request.environment_names |= set([env.name])
|
|
142
|
+
|
|
143
|
+
# snapshot_intervals_to_clear now contains the entire hierarchy of affected snapshots based
|
|
144
|
+
# on building the DAG for each environment and including downstream snapshots
|
|
145
|
+
# but, what if there are affected snapshots that arent part of any environment?
|
|
146
|
+
unique_snapshot_names = set(snapshot_id.name for snapshot_id in snapshot_intervals_to_clear)
|
|
147
|
+
|
|
148
|
+
current_ts = current_ts or now_timestamp()
|
|
149
|
+
all_matching_non_prod_snapshots = {
|
|
150
|
+
s.snapshot_id: s
|
|
151
|
+
for s in state_reader.get_snapshots_by_names(
|
|
152
|
+
snapshot_names=unique_snapshot_names, current_ts=current_ts, exclude_expired=True
|
|
153
|
+
)
|
|
154
|
+
# Don't clear intervals for a snapshot if it shares the same physical version with prod.
|
|
155
|
+
# Otherwise, prod will be affected by what should be a dev operation
|
|
156
|
+
if s.name_version not in prod_name_versions
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
# identify the ones that we havent picked up yet, which are the ones that dont exist in any environment
|
|
160
|
+
if remaining_snapshot_ids := set(all_matching_non_prod_snapshots).difference(
|
|
161
|
+
snapshot_intervals_to_clear
|
|
162
|
+
):
|
|
163
|
+
# these snapshot id's exist in isolation and may be related to a downstream dependency of the :prod_restatements,
|
|
164
|
+
# rather than directly related, so we can't simply look up the interval to clear based on :prod_restatements.
|
|
165
|
+
# To figure out the interval that should be cleared, we can match to the existing list based on name
|
|
166
|
+
# and conservatively take the widest interval that shows up
|
|
167
|
+
snapshot_name_to_widest_interval: t.Dict[str, Interval] = {}
|
|
168
|
+
for s_id, clear_request in snapshot_intervals_to_clear.items():
|
|
169
|
+
current_start, current_end = snapshot_name_to_widest_interval.get(
|
|
170
|
+
s_id.name, clear_request.interval
|
|
171
|
+
)
|
|
172
|
+
next_start, next_end = clear_request.interval
|
|
173
|
+
|
|
174
|
+
next_start = min(current_start, next_start)
|
|
175
|
+
next_end = max(current_end, next_end)
|
|
176
|
+
|
|
177
|
+
snapshot_name_to_widest_interval[s_id.name] = (next_start, next_end)
|
|
178
|
+
|
|
179
|
+
for remaining_snapshot_id in remaining_snapshot_ids:
|
|
180
|
+
remaining_snapshot = all_matching_non_prod_snapshots[remaining_snapshot_id]
|
|
181
|
+
snapshot_intervals_to_clear[remaining_snapshot_id] = SnapshotIntervalClearRequest(
|
|
182
|
+
snapshot=remaining_snapshot,
|
|
183
|
+
interval=snapshot_name_to_widest_interval[remaining_snapshot_id.name],
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
# for any affected full_history_restatement_only snapshots, we need to widen the intervals being restated to
|
|
187
|
+
# include the whole time range for that snapshot. This requires a call to state to load the full snapshot record,
|
|
188
|
+
# so we only do it if necessary
|
|
189
|
+
full_history_restatement_snapshot_ids = [
|
|
190
|
+
# FIXME: full_history_restatement_only is just one indicator that the snapshot can only be fully refreshed, the other one is Model.depends_on_self
|
|
191
|
+
# however, to figure out depends_on_self, we have to render all the model queries which, alongside having to fetch full snapshots from state,
|
|
192
|
+
# is problematic in secure environments that are deliberately isolated from arbitrary user code (since rendering a query may require user macros to be present)
|
|
193
|
+
# So for now, these are not considered
|
|
194
|
+
s_id
|
|
195
|
+
for s_id, s in snapshot_intervals_to_clear.items()
|
|
196
|
+
if s.snapshot.full_history_restatement_only
|
|
197
|
+
]
|
|
198
|
+
if full_history_restatement_snapshot_ids:
|
|
199
|
+
# only load full snapshot records that we havent already loaded
|
|
200
|
+
additional_snapshots = state_reader.get_snapshots(
|
|
201
|
+
[
|
|
202
|
+
s.snapshot_id
|
|
203
|
+
for s in full_history_restatement_snapshot_ids
|
|
204
|
+
if s.snapshot_id not in loaded_snapshots
|
|
205
|
+
]
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
all_snapshots = loaded_snapshots | additional_snapshots
|
|
209
|
+
|
|
210
|
+
for full_snapshot_id in full_history_restatement_snapshot_ids:
|
|
211
|
+
full_snapshot = all_snapshots[full_snapshot_id]
|
|
212
|
+
intervals_to_clear = snapshot_intervals_to_clear[full_snapshot_id]
|
|
213
|
+
|
|
214
|
+
original_start, original_end = intervals_to_clear.interval
|
|
215
|
+
|
|
216
|
+
# get_removal_interval() widens intervals if necessary
|
|
217
|
+
new_interval = full_snapshot.get_removal_interval(
|
|
218
|
+
start=original_start, end=original_end
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
intervals_to_clear.interval = new_interval
|
|
222
|
+
|
|
223
|
+
return snapshot_intervals_to_clear
|