PyPI - sqlmesh - Versions diffs - 0.225.0__py3-none-any.whl → 0.227.2.dev6__py3-none-any.whl - Mend

sqlmesh 0.225.0py3-none-any.whl → 0.227.2.dev6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sqlmesh might be problematic. Click here for more details.

Files changed (43) hide show

sqlmesh/__init__.py +10 -2
sqlmesh/_version.py +2 -2
sqlmesh/core/config/connection.py +10 -5
sqlmesh/core/config/loader.py +1 -0
sqlmesh/core/context.py +73 -8
sqlmesh/core/engine_adapter/base.py +12 -7
sqlmesh/core/engine_adapter/fabric.py +1 -2
sqlmesh/core/engine_adapter/mssql.py +5 -2
sqlmesh/core/engine_adapter/trino.py +1 -1
sqlmesh/core/lineage.py +1 -0
sqlmesh/core/linter/rules/builtin.py +15 -0
sqlmesh/core/loader.py +4 -0
sqlmesh/core/model/kind.py +2 -2
sqlmesh/core/plan/definition.py +9 -7
sqlmesh/core/renderer.py +7 -8
sqlmesh/core/scheduler.py +45 -15
sqlmesh/core/signal.py +35 -14
sqlmesh/core/snapshot/definition.py +18 -12
sqlmesh/core/snapshot/evaluator.py +31 -17
sqlmesh/core/state_sync/db/snapshot.py +6 -1
sqlmesh/core/table_diff.py +2 -2
sqlmesh/core/test/definition.py +5 -3
sqlmesh/core/test/discovery.py +4 -0
sqlmesh/dbt/builtin.py +9 -11
sqlmesh/dbt/column.py +17 -5
sqlmesh/dbt/common.py +4 -2
sqlmesh/dbt/context.py +2 -0
sqlmesh/dbt/loader.py +15 -2
sqlmesh/dbt/manifest.py +3 -1
sqlmesh/dbt/model.py +13 -1
sqlmesh/dbt/profile.py +3 -3
sqlmesh/dbt/target.py +9 -4
sqlmesh/utils/date.py +1 -1
sqlmesh/utils/pydantic.py +6 -6
sqlmesh/utils/windows.py +13 -3
{sqlmesh-0.225.0.dist-info → sqlmesh-0.227.2.dev6.dist-info}/METADATA +2 -2
{sqlmesh-0.225.0.dist-info → sqlmesh-0.227.2.dev6.dist-info}/RECORD +43 -43
sqlmesh_dbt/cli.py +26 -1
sqlmesh_dbt/operations.py +8 -2
{sqlmesh-0.225.0.dist-info → sqlmesh-0.227.2.dev6.dist-info}/WHEEL +0 -0
{sqlmesh-0.225.0.dist-info → sqlmesh-0.227.2.dev6.dist-info}/entry_points.txt +0 -0
{sqlmesh-0.225.0.dist-info → sqlmesh-0.227.2.dev6.dist-info}/licenses/LICENSE +0 -0
{sqlmesh-0.225.0.dist-info → sqlmesh-0.227.2.dev6.dist-info}/top_level.txt +0 -0

sqlmesh/core/scheduler.py CHANGED Viewed

@@ -352,7 +352,7 @@ class Scheduler:
             )
             for snapshot, intervals in merged_intervals.items()
         }
-        snapshot_batches = {}
+        snapshot_batches: t.Dict[Snapshot, Intervals] = {}
         all_unready_intervals: t.Dict[str, set[Interval]] = {}
         for snapshot_id in dag:
             if snapshot_id not in snapshot_intervals:
@@ -364,6 +364,14 @@ class Scheduler:
             adapter = self.snapshot_evaluator.get_adapter(snapshot.model_gateway)
+            parent_intervals: Intervals = []
+            for parent_id in snapshot.parents:
+                parent_snapshot, _ = snapshot_intervals.get(parent_id, (None, []))
+                if not parent_snapshot or parent_snapshot.is_external:
+                    continue
+                parent_intervals.extend(snapshot_batches[parent_snapshot])
             context = ExecutionContext(
                 adapter,
                 self.snapshots_by_name,
@@ -371,6 +379,7 @@ class Scheduler:
                 default_dialect=adapter.dialect,
                 default_catalog=self.default_catalog,
                 is_restatement=is_restatement,
+                parent_intervals=parent_intervals,
             )
             intervals = self._check_ready_intervals(
@@ -538,6 +547,10 @@ class Scheduler:
                             execution_time=execution_time,
                         )
                     else:
+                        # If batch_index > 0, then the target table must exist since the first batch would have created it
+                        target_table_exists = (
+                            snapshot.snapshot_id not in snapshots_to_create or node.batch_index > 0
+                        )
                         audit_results = self.evaluate(
                             snapshot=snapshot,
                             environment_naming_info=environment_naming_info,
@@ -548,7 +561,7 @@ class Scheduler:
                             batch_index=node.batch_index,
                             allow_destructive_snapshots=allow_destructive_snapshots,
                             allow_additive_snapshots=allow_additive_snapshots,
-                            target_table_exists=snapshot.snapshot_id not in snapshots_to_create,
+                            target_table_exists=target_table_exists,
                             selected_models=selected_models,
                         )
@@ -646,6 +659,7 @@ class Scheduler:
         }
         snapshots_to_create = snapshots_to_create or set()
         original_snapshots_to_create = snapshots_to_create.copy()
+        upstream_dependencies_cache: t.Dict[SnapshotId, t.Set[SchedulingUnit]] = {}
         snapshot_dag = snapshot_dag or snapshots_to_dag(batches)
         dag = DAG[SchedulingUnit]()
@@ -657,12 +671,15 @@ class Scheduler:
             snapshot = self.snapshots_by_name[snapshot_id.name]
             intervals = intervals_per_snapshot.get(snapshot.name, [])
-            upstream_dependencies: t.List[SchedulingUnit] = []
+            upstream_dependencies: t.Set[SchedulingUnit] = set()
             for p_sid in snapshot.parents:
-                upstream_dependencies.extend(
+                upstream_dependencies.update(
                     self._find_upstream_dependencies(
-                        p_sid, intervals_per_snapshot, original_snapshots_to_create
+                        p_sid,
+                        intervals_per_snapshot,
+                        original_snapshots_to_create,
+                        upstream_dependencies_cache,
                     )
                 )
@@ -713,29 +730,42 @@ class Scheduler:
         parent_sid: SnapshotId,
         intervals_per_snapshot: t.Dict[str, Intervals],
         snapshots_to_create: t.Set[SnapshotId],
-    ) -> t.List[SchedulingUnit]:
+        cache: t.Dict[SnapshotId, t.Set[SchedulingUnit]],
+    ) -> t.Set[SchedulingUnit]:
         if parent_sid not in self.snapshots:
-            return []
+            return set()
+        if parent_sid in cache:
+            return cache[parent_sid]
         p_intervals = intervals_per_snapshot.get(parent_sid.name, [])
+        parent_node: t.Optional[SchedulingUnit] = None
         if p_intervals:
             if len(p_intervals) > 1:
-                return [DummyNode(snapshot_name=parent_sid.name)]
-            interval = p_intervals[0]
-            return [EvaluateNode(snapshot_name=parent_sid.name, interval=interval, batch_index=0)]
-        if parent_sid in snapshots_to_create:
-            return [CreateNode(snapshot_name=parent_sid.name)]
+                parent_node = DummyNode(snapshot_name=parent_sid.name)
+            else:
+                interval = p_intervals[0]
+                parent_node = EvaluateNode(
+                    snapshot_name=parent_sid.name, interval=interval, batch_index=0
+                )
+        elif parent_sid in snapshots_to_create:
+            parent_node = CreateNode(snapshot_name=parent_sid.name)
+        if parent_node is not None:
+            cache[parent_sid] = {parent_node}
+            return {parent_node}
         # This snapshot has no intervals and doesn't need creation which means
         # that it can be a transitive dependency
-        transitive_deps: t.List[SchedulingUnit] = []
+        transitive_deps: t.Set[SchedulingUnit] = set()
         parent_snapshot = self.snapshots[parent_sid]
         for grandparent_sid in parent_snapshot.parents:
-            transitive_deps.extend(
+            transitive_deps.update(
                 self._find_upstream_dependencies(
-                    grandparent_sid, intervals_per_snapshot, snapshots_to_create
+                    grandparent_sid, intervals_per_snapshot, snapshots_to_create, cache
                 )
             )
+        cache[parent_sid] = transitive_deps
         return transitive_deps
     def _run_or_audit(

sqlmesh/core/signal.py CHANGED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 import typing as t
 from sqlmesh.utils import UniqueKeyDict, registry_decorator
+from sqlmesh.utils.errors import MissingSourceError
 if t.TYPE_CHECKING:
     from sqlmesh.core.context import ExecutionContext
@@ -42,7 +43,16 @@ SignalRegistry = UniqueKeyDict[str, signal]
 @signal()
-def freshness(batch: DatetimeRanges, snapshot: Snapshot, context: ExecutionContext) -> bool:
+def freshness(
+    batch: DatetimeRanges,
+    snapshot: Snapshot,
+    context: ExecutionContext,
+) -> bool:
+    """
+    Implements model freshness as a signal, i.e it considers this model to be fresh if:
+    - Any upstream SQLMesh model has available intervals to compute i.e is fresh
+    - Any upstream external model has been altered since the last time the model was evaluated
+    """
     adapter = context.engine_adapter
     if context.is_restatement or not adapter.SUPPORTS_METADATA_TABLE_LAST_MODIFIED_TS:
         return True
@@ -54,24 +64,35 @@ def freshness(batch: DatetimeRanges, snapshot: Snapshot, context: ExecutionConte
         if deployability_index.is_deployable(snapshot)
         else snapshot.dev_last_altered_ts
     )
     if not last_altered_ts:
         return True
     parent_snapshots = {context.snapshots[p.name] for p in snapshot.parents}
-    if len(parent_snapshots) != len(snapshot.node.depends_on) or not all(
-        p.is_external for p in parent_snapshots
-    ):
-        # The mismatch can happen if e.g an external model is not registered in the project
+    upstream_parent_snapshots = {p for p in parent_snapshots if not p.is_external}
+    external_parents = snapshot.node.depends_on - {p.name for p in upstream_parent_snapshots}
+    if context.parent_intervals:
+        # At least one upstream sqlmesh model has intervals to compute (i.e is fresh),
+        # so the current model is considered fresh too
         return True
-    # Finding new data means that the upstream depedencies have been altered
-    # since the last time the model was evaluated
-    upstream_dep_has_new_data = any(
-        upstream_last_altered_ts > last_altered_ts
-        for upstream_last_altered_ts in adapter.get_table_last_modified_ts(
-            [p.name for p in parent_snapshots]
+    if external_parents:
+        external_last_altered_timestamps = adapter.get_table_last_modified_ts(
+            list(external_parents)
+        )
+        if len(external_last_altered_timestamps) != len(external_parents):
+            raise MissingSourceError(
+                f"Expected {len(external_parents)} sources to be present, but got {len(external_last_altered_timestamps)}."
+            )
+        # Finding new data means that the upstream depedencies have been altered
+        # since the last time the model was evaluated
+        return any(
+            external_last_altered_ts > last_altered_ts
+            for external_last_altered_ts in external_last_altered_timestamps
         )
-    )
-    # Returning true is a no-op, returning False nullifies the batch so the model will not be evaluated.
-    return upstream_dep_has_new_data
+    return False

sqlmesh/core/snapshot/definition.py CHANGED Viewed

@@ -2081,16 +2081,20 @@ def missing_intervals(
                 continue
             snapshot_end_date = existing_interval_end
+        snapshot_start_date = max(
+            to_datetime(snapshot_start_date),
+            to_datetime(start_date(snapshot, snapshots, cache, relative_to=snapshot_end_date)),
+        )
+        if snapshot_start_date > to_datetime(snapshot_end_date):
+            continue
         missing_interval_end_date = snapshot_end_date
         node_end_date = snapshot.node.end
         if node_end_date and (to_datetime(node_end_date) < to_datetime(snapshot_end_date)):
             missing_interval_end_date = node_end_date
         intervals = snapshot.missing_intervals(
-            max(
-                to_datetime(snapshot_start_date),
-                to_datetime(start_date(snapshot, snapshots, cache, relative_to=snapshot_end_date)),
-            ),
+            snapshot_start_date,
             missing_interval_end_date,
             execution_time=execution_time,
             deployability_index=deployability_index,
@@ -2295,14 +2299,16 @@ def start_date(
     if not isinstance(snapshots, dict):
         snapshots = {snapshot.snapshot_id: snapshot for snapshot in snapshots}
-    earliest = snapshot.node.cron_prev(snapshot.node.cron_floor(relative_to or now()))
-    for parent in snapshot.parents:
-        if parent in snapshots:
-            earliest = min(
-                earliest,
-                start_date(snapshots[parent], snapshots, cache=cache, relative_to=relative_to),
-            )
+    parent_starts = [
+        start_date(snapshots[parent], snapshots, cache=cache, relative_to=relative_to)
+        for parent in snapshot.parents
+        if parent in snapshots
+    ]
+    earliest = (
+        min(parent_starts)
+        if parent_starts
+        else snapshot.node.cron_prev(snapshot.node.cron_floor(relative_to or now()))
+    )
     cache[key] = earliest
     return earliest

sqlmesh/core/snapshot/evaluator.py CHANGED Viewed

@@ -1021,6 +1021,11 @@ class SnapshotEvaluator:
         ):
             import pandas as pd
+            try:
+                first_query_or_df = next(queries_or_dfs)
+            except StopIteration:
+                return
             query_or_df = reduce(
                 lambda a, b: (
                     pd.concat([a, b], ignore_index=True)  # type: ignore
@@ -1028,6 +1033,7 @@ class SnapshotEvaluator:
                     else a.union_all(b)  # type: ignore
                 ),  # type: ignore
                 queries_or_dfs,
+                first_query_or_df,
             )
             apply(query_or_df, index=0)
         else:
@@ -1593,14 +1599,14 @@ class SnapshotEvaluator:
         tables_by_gateway_and_schema: t.Dict[t.Union[str, None], t.Dict[exp.Table, set[str]]] = (
             defaultdict(lambda: defaultdict(set))
         )
-        snapshots_by_table_name: t.Dict[str, Snapshot] = {}
+        snapshots_by_table_name: t.Dict[exp.Table, t.Dict[str, Snapshot]] = defaultdict(dict)
         for snapshot in target_snapshots:
             if not snapshot.is_model or snapshot.is_symbolic:
                 continue
             table = table_name_callable(snapshot)
             table_schema = d.schema_(table.db, catalog=table.catalog)
             tables_by_gateway_and_schema[snapshot.model_gateway][table_schema].add(table.name)
-            snapshots_by_table_name[table.name] = snapshot
+            snapshots_by_table_name[table_schema][table.name] = snapshot
         def _get_data_objects_in_schema(
             schema: exp.Table,
@@ -1613,23 +1619,25 @@ class SnapshotEvaluator:
             )
         with self.concurrent_context():
-            existing_objects: t.List[DataObject] = []
+            snapshot_id_to_obj: t.Dict[SnapshotId, DataObject] = {}
             # A schema can be shared across multiple engines, so we need to group tables by both gateway and schema
             for gateway, tables_by_schema in tables_by_gateway_and_schema.items():
-                objs_for_gateway = [
-                    obj
-                    for objs in concurrent_apply_to_values(
-                        list(tables_by_schema),
-                        lambda s: _get_data_objects_in_schema(
-                            schema=s, object_names=tables_by_schema.get(s), gateway=gateway
-                        ),
-                        self.ddl_concurrent_tasks,
-                    )
-                    for obj in objs
-                ]
-                existing_objects.extend(objs_for_gateway)
+                schema_list = list(tables_by_schema.keys())
+                results = concurrent_apply_to_values(
+                    schema_list,
+                    lambda s: _get_data_objects_in_schema(
+                        schema=s, object_names=tables_by_schema.get(s), gateway=gateway
+                    ),
+                    self.ddl_concurrent_tasks,
+                )
+                for schema, objs in zip(schema_list, results):
+                    snapshots_by_name = snapshots_by_table_name.get(schema, {})
+                    for obj in objs:
+                        if obj.name in snapshots_by_name:
+                            snapshot_id_to_obj[snapshots_by_name[obj.name].snapshot_id] = obj
-        return {snapshots_by_table_name[obj.name].snapshot_id: obj for obj in existing_objects}
+        return snapshot_id_to_obj
 def _evaluation_strategy(snapshot: SnapshotInfoLike, adapter: EngineAdapter) -> EvaluationStrategy:
@@ -2185,7 +2193,13 @@ class MaterializableStrategy(PromotableStrategy, abc.ABC):
         if model.on_destructive_change.is_ignore or model.on_additive_change.is_ignore:
             # We need to identify the columns that are only in the source so we create an empty table with
             # the user query to determine that
-            with self.adapter.temp_table(model.ctas_query(**render_kwargs)) as temp_table:
+            temp_table_name = exp.table_(
+                "diff",
+                db=model.physical_schema,
+            )
+            with self.adapter.temp_table(
+                model.ctas_query(**render_kwargs), name=temp_table_name
+            ) as temp_table:
                 source_columns = list(self.adapter.columns(temp_table))
         else:
             source_columns = None

sqlmesh/core/state_sync/db/snapshot.py CHANGED Viewed

@@ -185,7 +185,12 @@ class SnapshotState:
         promoted_snapshot_ids = {
             snapshot.snapshot_id
             for environment in environments
-            for snapshot in environment.snapshots
+            for snapshot in (
+                environment.snapshots
+                if environment.finalized_ts is not None
+                # If the environment is not finalized, check both the current snapshots and the previous finalized snapshots
+                else [*environment.snapshots, *(environment.previous_finalized_snapshots or [])]
+            )
         }
         if promoted_snapshot_ids:

sqlmesh/core/table_diff.py CHANGED Viewed

@@ -367,8 +367,8 @@ class TableDiff:
                 column_type = matched_columns[name]
                 qualified_column = exp.column(name, table)
-                if column_type.is_type(*exp.DataType.FLOAT_TYPES):
-                    return exp.func("ROUND", qualified_column, exp.Literal.number(self.decimals))
+                if column_type.is_type(*exp.DataType.REAL_TYPES):
+                    return self.adapter._normalize_decimal_value(qualified_column, self.decimals)
                 if column_type.is_type(*exp.DataType.NESTED_TYPES):
                     return self.adapter._normalize_nested_value(qualified_column)

sqlmesh/core/test/definition.py CHANGED Viewed

@@ -454,6 +454,9 @@ class ModelTest(unittest.TestCase):
         query = outputs.get("query")
         partial = outputs.pop("partial", None)
+        if ctes is None and query is None:
+            _raise_error("Incomplete test, outputs must contain 'query' or 'ctes'", self.path)
         def _normalize_rows(
             values: t.List[Row] | t.Dict,
             name: str,
@@ -804,7 +807,7 @@ class PythonModelTest(ModelTest):
             actual_df.reset_index(drop=True, inplace=True)
             expected = self._create_df(values, columns=self.model.columns_to_types, partial=partial)
-            self.assert_equal(expected, actual_df, sort=False, partial=partial)
+            self.assert_equal(expected, actual_df, sort=True, partial=partial)
     def _execute_model(self) -> pd.DataFrame:
         """Executes the python model and returns a DataFrame."""
@@ -922,8 +925,7 @@ def generate_test(
                 cte_output = test._execute(cte_query)
                 ctes[cte.alias] = (
                     pandas_timestamp_to_pydatetime(
-                        cte_output.apply(lambda col: col.map(_normalize_df_value)),
-                        cte_query.named_selects,
+                        df=cte_output.apply(lambda col: col.map(_normalize_df_value)),
                     )
                     .replace({np.nan: None})
                     .to_dict(orient="records")

sqlmesh/core/test/discovery.py CHANGED Viewed

@@ -20,6 +20,10 @@ class ModelTestMetadata(PydanticModel):
     def fully_qualified_test_name(self) -> str:
         return f"{self.path}::{self.test_name}"
+    @property
+    def model_name(self) -> str:
+        return self.body.get("model", "")
     def __hash__(self) -> int:
         return self.fully_qualified_test_name.__hash__()

sqlmesh/dbt/builtin.py CHANGED Viewed

@@ -25,7 +25,7 @@ from sqlmesh.dbt.target import TARGET_TYPE_TO_CONFIG_CLASS
 from sqlmesh.dbt.util import DBT_VERSION
 from sqlmesh.utils import AttributeDict, debug_mode_enabled, yaml
 from sqlmesh.utils.date import now
-from sqlmesh.utils.errors import ConfigError, MacroEvalError
+from sqlmesh.utils.errors import ConfigError
 from sqlmesh.utils.jinja import JinjaMacroRegistry, MacroReference, MacroReturnVal
 logger = logging.getLogger(__name__)
@@ -381,18 +381,16 @@ def do_zip(*args: t.Any, default: t.Optional[t.Any] = None) -> t.Optional[t.Any]
         return default
-def as_bool(value: str) -> bool:
-    result = _try_literal_eval(value)
-    if isinstance(result, bool):
-        return result
-    raise MacroEvalError(f"Failed to convert '{value}' into boolean.")
+def as_bool(value: t.Any) -> t.Any:
+    # dbt's jinja TEXT_FILTERS just return the input value as is
+    # https://github.com/dbt-labs/dbt-common/blob/main/dbt_common/clients/jinja.py#L559
+    return value
 def as_number(value: str) -> t.Any:
-    result = _try_literal_eval(value)
-    if isinstance(value, (int, float)) and not isinstance(result, bool):
-        return result
-    raise MacroEvalError(f"Failed to convert '{value}' into number.")
+    # dbt's jinja TEXT_FILTERS just return the input value as is
+    # https://github.com/dbt-labs/dbt-common/blob/main/dbt_common/clients/jinja.py#L559
+    return value
 def _try_literal_eval(value: str) -> t.Any:
@@ -482,7 +480,7 @@ def create_builtin_globals(
     if variables is not None:
         builtin_globals["var"] = Var(variables)
-    builtin_globals["config"] = Config(jinja_globals.pop("config", {}))
+    builtin_globals["config"] = Config(jinja_globals.pop("config", {"tags": []}))
     deployability_index = (
         jinja_globals.get("deployability_index") or DeployabilityIndex.all_deployable()

sqlmesh/dbt/column.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
 import typing as t
+import logging
 from sqlglot import exp, parse_one
 from sqlglot.helper import ensure_list
@@ -9,6 +10,8 @@ from sqlmesh.dbt.common import GeneralConfig
 from sqlmesh.utils.conversions import ensure_bool
 from sqlmesh.utils.pydantic import field_validator
+logger = logging.getLogger(__name__)
 def yaml_to_columns(
     yaml: t.Dict[str, ColumnConfig] | t.List[t.Dict[str, ColumnConfig]],
@@ -31,11 +34,20 @@ def column_types_to_sqlmesh(
     Returns:
         A dict of column name to exp.DataType
     """
-    return {
-        name: parse_one(column.data_type, into=exp.DataType, dialect=dialect or "")
-        for name, column in columns.items()
-        if column.enabled and column.data_type
-    }
+    col_types_to_sqlmesh: t.Dict[str, exp.DataType] = {}
+    for name, column in columns.items():
+        if column.enabled and column.data_type:
+            column_def = parse_one(
+                f"{name} {column.data_type}", into=exp.ColumnDef, dialect=dialect or ""
+            )
+            if column_def.args.get("constraints"):
+                logger.warning(
+                    f"Ignoring unsupported constraints for column '{name}' with definition '{column.data_type}'. Please refer to github.com/TobikoData/sqlmesh/issues/4717 for more information."
+                )
+            kind = column_def.kind
+            if kind:
+                col_types_to_sqlmesh[name] = kind
+    return col_types_to_sqlmesh
 def column_descriptions_to_sqlmesh(columns: t.Dict[str, ColumnConfig]) -> t.Dict[str, str]:

sqlmesh/dbt/common.py CHANGED Viewed

@@ -46,7 +46,9 @@ def load_yaml(source: str | Path) -> t.Dict:
         raise ConfigError(f"{source}: {ex}" if isinstance(source, Path) else f"{ex}")
-def parse_meta(v: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]:
+def parse_meta(v: t.Optional[t.Dict[str, t.Any]]) -> t.Dict[str, t.Any]:
+    if v is None:
+        return {}
     for key, value in v.items():
         if isinstance(value, str):
             v[key] = try_str_to_bool(value)
@@ -115,7 +117,7 @@ class GeneralConfig(DbtConfig):
     @field_validator("meta", mode="before")
     @classmethod
-    def _validate_meta(cls, v: t.Dict[str, t.Union[str, t.Any]]) -> t.Dict[str, t.Any]:
+    def _validate_meta(cls, v: t.Optional[t.Dict[str, t.Union[str, t.Any]]]) -> t.Dict[str, t.Any]:
         return parse_meta(v)
     _FIELD_UPDATE_STRATEGY: t.ClassVar[t.Dict[str, UpdateStrategy]] = {

sqlmesh/dbt/context.py CHANGED Viewed

@@ -37,6 +37,8 @@ class DbtContext:
     """Context for DBT environment"""
     project_root: Path = Path()
+    profiles_dir: t.Optional[Path] = None
+    """Optional override to specify the directory where profiles.yml is located, if not at the :project_root"""
     target_name: t.Optional[str] = None
     profile_name: t.Optional[str] = None
     project_schema: t.Optional[str] = None

sqlmesh/dbt/loader.py CHANGED Viewed

@@ -53,10 +53,18 @@ def sqlmesh_config(
     threads: t.Optional[int] = None,
     register_comments: t.Optional[bool] = None,
     infer_state_schema_name: bool = False,
+    profiles_dir: t.Optional[Path] = None,
     **kwargs: t.Any,
 ) -> Config:
     project_root = project_root or Path()
-    context = DbtContext(project_root=project_root, profile_name=dbt_profile_name)
+    context = DbtContext(
+        project_root=project_root, profiles_dir=profiles_dir, profile_name=dbt_profile_name
+    )
+    # note: Profile.load() is called twice with different DbtContext's:
+    # - once here with the above DbtContext (to determine connnection / gateway config which has to be set up before everything else)
+    # - again on the SQLMesh side via GenericContext.load() -> DbtLoader._load_projects() -> Project.load() which constructs a fresh DbtContext and ignores the above one
+    # it's important to ensure that the DbtContext created within the DbtLoader uses the same project root / profiles dir that we use here
     profile = Profile.load(context, target_name=dbt_target_name)
     model_defaults = kwargs.pop("model_defaults", ModelDefaultsConfig())
     if model_defaults.dialect is None:
@@ -98,6 +106,7 @@ def sqlmesh_config(
     return Config(
         loader=loader,
+        loader_kwargs=dict(profiles_dir=profiles_dir),
         model_defaults=model_defaults,
         variables=variables or {},
         dbt=RootDbtConfig(infer_state_schema_name=infer_state_schema_name),
@@ -116,9 +125,12 @@ def sqlmesh_config(
 class DbtLoader(Loader):
-    def __init__(self, context: GenericContext, path: Path) -> None:
+    def __init__(
+        self, context: GenericContext, path: Path, profiles_dir: t.Optional[Path] = None
+    ) -> None:
         self._projects: t.List[Project] = []
         self._macros_max_mtime: t.Optional[float] = None
+        self._profiles_dir = profiles_dir
         super().__init__(context, path)
     def load(self) -> LoadedProject:
@@ -225,6 +237,7 @@ class DbtLoader(Loader):
             project = Project.load(
                 DbtContext(
                     project_root=self.config_path,
+                    profiles_dir=self._profiles_dir,
                     target_name=target_name,
                     sqlmesh_config=self.config,
                 ),

sqlmesh/dbt/manifest.py CHANGED Viewed

@@ -11,7 +11,7 @@ from collections import defaultdict
 from functools import cached_property
 from pathlib import Path
-from dbt import constants as dbt_constants, flags
+from dbt import flags
 from sqlmesh.dbt.util import DBT_VERSION
 from sqlmesh.utils.conversions import make_serializable
@@ -19,6 +19,8 @@ from sqlmesh.utils.conversions import make_serializable
 # Override the file name to prevent dbt commands from invalidating the cache.
 if DBT_VERSION >= (1, 6, 0):
+    from dbt import constants as dbt_constants
     dbt_constants.PARTIAL_PARSE_FILE_NAME = "sqlmesh_partial_parse.msgpack"  # type: ignore
 else:
     from dbt.parser import manifest as dbt_manifest  # type: ignore

sqlmesh/dbt/model.py CHANGED Viewed

@@ -567,6 +567,12 @@ class ModelConfig(BaseModelConfig):
                     self.name,
                     "views" if isinstance(kind, ViewKind) else "ephemeral models",
                 )
+            elif context.target.dialect == "snowflake":
+                logger.warning(
+                    "Ignoring partition_by config for model '%s' targeting %s. The partition_by config is not supported for Snowflake.",
+                    self.name,
+                    context.target.dialect,
+                )
             else:
                 partitioned_by = []
                 if isinstance(self.partition_by, list):
@@ -601,7 +607,13 @@ class ModelConfig(BaseModelConfig):
                 clustered_by = []
                 for c in self.cluster_by:
                     try:
-                        clustered_by.append(d.parse_one(c, dialect=model_dialect))
+                        cluster_expr = exp.maybe_parse(
+                            c, into=exp.Cluster, prefix="CLUSTER BY", dialect=model_dialect
+                        )
+                        for expr in cluster_expr.expressions:
+                            clustered_by.append(
+                                expr.this if isinstance(expr, exp.Ordered) else expr
+                            )
                     except SqlglotError as e:
                         raise ConfigError(
                             f"Failed to parse model '{self.canonical_name(context)}' cluster_by field '{c}' in '{self.path}': {e}"

sqlmesh/dbt/profile.py CHANGED Viewed

@@ -60,7 +60,7 @@ class Profile:
             if not context.profile_name:
                 raise ConfigError(f"{project_file.stem} must include project name.")
-        profile_filepath = cls._find_profile(context.project_root)
+        profile_filepath = cls._find_profile(context.project_root, context.profiles_dir)
         if not profile_filepath:
             raise ConfigError(f"{cls.PROFILE_FILE} not found.")
@@ -68,8 +68,8 @@ class Profile:
         return Profile(profile_filepath, target_name, target)
     @classmethod
-    def _find_profile(cls, project_root: Path) -> t.Optional[Path]:
-        dir = os.environ.get("DBT_PROFILES_DIR", "")
+    def _find_profile(cls, project_root: Path, profiles_dir: t.Optional[Path]) -> t.Optional[Path]:
+        dir = os.environ.get("DBT_PROFILES_DIR", profiles_dir or "")
         path = Path(project_root, dir, cls.PROFILE_FILE)
         if path.exists():
             return path

sqlmesh 0.225.0__py3-none-any.whl → 0.227.2.dev6__py3-none-any.whl

Potentially problematic release.

sqlmesh 0.225.0py3-none-any.whl → 0.227.2.dev6py3-none-any.whl