PyPI - sqlmesh - Versions diffs - 0.225.1.dev26__py3-none-any.whl → 0.228.2__py3-none-any.whl - Mend

sqlmesh 0.225.1.dev26py3-none-any.whl → 0.228.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sqlmesh might be problematic. Click here for more details.

Files changed (34) hide show

sqlmesh/_version.py +2 -2
sqlmesh/core/config/connection.py +37 -1
sqlmesh/core/context.py +60 -10
sqlmesh/core/dialect.py +10 -2
sqlmesh/core/engine_adapter/base.py +8 -1
sqlmesh/core/engine_adapter/databricks.py +33 -16
sqlmesh/core/engine_adapter/fabric.py +110 -2
sqlmesh/core/engine_adapter/trino.py +44 -6
sqlmesh/core/lineage.py +1 -0
sqlmesh/core/linter/rules/builtin.py +15 -0
sqlmesh/core/loader.py +17 -30
sqlmesh/core/model/definition.py +9 -0
sqlmesh/core/plan/definition.py +9 -7
sqlmesh/core/renderer.py +7 -8
sqlmesh/core/scheduler.py +45 -15
sqlmesh/core/signal.py +35 -14
sqlmesh/core/snapshot/definition.py +18 -12
sqlmesh/core/snapshot/evaluator.py +24 -16
sqlmesh/core/test/definition.py +5 -5
sqlmesh/core/test/discovery.py +4 -0
sqlmesh/dbt/common.py +4 -2
sqlmesh/dbt/manifest.py +3 -1
sqlmesh/integrations/github/cicd/command.py +11 -2
sqlmesh/integrations/github/cicd/controller.py +6 -2
sqlmesh/lsp/context.py +4 -2
sqlmesh/magics.py +1 -1
sqlmesh/utils/date.py +1 -1
sqlmesh/utils/git.py +3 -1
{sqlmesh-0.225.1.dev26.dist-info → sqlmesh-0.228.2.dist-info}/METADATA +3 -3
{sqlmesh-0.225.1.dev26.dist-info → sqlmesh-0.228.2.dist-info}/RECORD +34 -34
{sqlmesh-0.225.1.dev26.dist-info → sqlmesh-0.228.2.dist-info}/WHEEL +0 -0
{sqlmesh-0.225.1.dev26.dist-info → sqlmesh-0.228.2.dist-info}/entry_points.txt +0 -0
{sqlmesh-0.225.1.dev26.dist-info → sqlmesh-0.228.2.dist-info}/licenses/LICENSE +0 -0
{sqlmesh-0.225.1.dev26.dist-info → sqlmesh-0.228.2.dist-info}/top_level.txt +0 -0

sqlmesh/core/loader.py CHANGED Viewed

@@ -35,7 +35,7 @@ from sqlmesh.core.model import (
 from sqlmesh.core.model import model as model_registry
 from sqlmesh.core.model.common import make_python_env
 from sqlmesh.core.signal import signal
-from sqlmesh.core.test import ModelTestMetadata, filter_tests_by_patterns
+from sqlmesh.core.test import ModelTestMetadata
 from sqlmesh.utils import UniqueKeyDict, sys_path
 from sqlmesh.utils.errors import ConfigError
 from sqlmesh.utils.jinja import JinjaMacroRegistry, MacroExtractor
@@ -64,6 +64,7 @@ class LoadedProject:
     excluded_requirements: t.Set[str]
     environment_statements: t.List[EnvironmentStatements]
     user_rules: RuleSet
+    model_test_metadata: t.List[ModelTestMetadata]
 class CacheBase(abc.ABC):
@@ -243,6 +244,8 @@ class Loader(abc.ABC):
             user_rules = self._load_linting_rules()
+            model_test_metadata = self.load_model_tests()
             project = LoadedProject(
                 macros=macros,
                 jinja_macros=jinja_macros,
@@ -254,6 +257,7 @@ class Loader(abc.ABC):
                 excluded_requirements=excluded_requirements,
                 environment_statements=environment_statements,
                 user_rules=user_rules,
+                model_test_metadata=model_test_metadata,
             )
             return project
@@ -423,9 +427,7 @@ class Loader(abc.ABC):
         """Loads user linting rules"""
         return RuleSet()
-    def load_model_tests(
-        self, tests: t.Optional[t.List[str]] = None, patterns: list[str] | None = None
-    ) -> t.List[ModelTestMetadata]:
+    def load_model_tests(self) -> t.List[ModelTestMetadata]:
         """Loads YAML-based model tests"""
         return []
@@ -864,38 +866,23 @@ class SqlMeshLoader(Loader):
         return model_test_metadata
-    def load_model_tests(
-        self, tests: t.Optional[t.List[str]] = None, patterns: list[str] | None = None
-    ) -> t.List[ModelTestMetadata]:
+    def load_model_tests(self) -> t.List[ModelTestMetadata]:
         """Loads YAML-based model tests"""
         test_meta_list: t.List[ModelTestMetadata] = []
-        if tests:
-            for test in tests:
-                filename, test_name = test.split("::", maxsplit=1) if "::" in test else (test, "")
-                test_meta = self._load_model_test_file(Path(filename))
-                if test_name:
-                    test_meta_list.append(test_meta[test_name])
-                else:
-                    test_meta_list.extend(test_meta.values())
-        else:
-            search_path = Path(self.config_path) / c.TESTS
+        search_path = Path(self.config_path) / c.TESTS
-            for yaml_file in itertools.chain(
-                search_path.glob("**/test*.yaml"),
-                search_path.glob("**/test*.yml"),
+        for yaml_file in itertools.chain(
+            search_path.glob("**/test*.yaml"),
+            search_path.glob("**/test*.yml"),
+        ):
+            if any(
+                yaml_file.match(ignore_pattern)
+                for ignore_pattern in self.config.ignore_patterns or []
             ):
-                if any(
-                    yaml_file.match(ignore_pattern)
-                    for ignore_pattern in self.config.ignore_patterns or []
-                ):
-                    continue
-                test_meta_list.extend(self._load_model_test_file(yaml_file).values())
+                continue
-        if patterns:
-            test_meta_list = filter_tests_by_patterns(test_meta_list, patterns)
+            test_meta_list.extend(self._load_model_test_file(yaml_file).values())
         return test_meta_list

sqlmesh/core/model/definition.py CHANGED Viewed

@@ -34,6 +34,7 @@ from sqlmesh.core.model.common import (
 )
 from sqlmesh.core.model.meta import ModelMeta
 from sqlmesh.core.model.kind import (
+    ExternalKind,
     ModelKindName,
     SeedKind,
     ModelKind,
@@ -1035,6 +1036,13 @@ class _Model(ModelMeta, frozen=True):
             # Will raise if the custom materialization points to an invalid class
             get_custom_materialization_type_or_raise(self.kind.materialization)
+        # Embedded model kind shouldn't have audits
+        if self.kind.name == ModelKindName.EMBEDDED and self.audits:
+            raise_config_error(
+                "Audits are not supported for embedded models",
+                self._path,
+            )
     def is_breaking_change(self, previous: Model) -> t.Optional[bool]:
         """Determines whether this model is a breaking change in relation to the `previous` model.
@@ -1962,6 +1970,7 @@ class PythonModel(_Model):
 class ExternalModel(_Model):
     """The model definition which represents an external source/table."""
+    kind: ModelKind = ExternalKind()
     source_type: t.Literal["external"] = "external"
     def is_breaking_change(self, previous: Model) -> t.Optional[bool]:

sqlmesh/core/plan/definition.py CHANGED Viewed

@@ -63,7 +63,7 @@ class Plan(PydanticModel, frozen=True):
     restatements: t.Dict[SnapshotId, Interval]
     """
     All models being restated, which are typically the explicitly selected ones + their downstream dependencies.
     Note that dev previews are also considered restatements, so :selected_models_to_restate can be empty
     while :restatements is still populated with dev previews
     """
@@ -213,8 +213,8 @@ class Plan(PydanticModel, frozen=True):
         snapshots_by_name = self.context_diff.snapshots_by_name
         snapshots = [s.table_info for s in self.snapshots.values()]
-        promoted_snapshot_ids = None
-        if self.is_dev and not self.include_unmodified:
+        promotable_snapshot_ids = None
+        if self.is_dev:
             if self.selected_models_to_backfill is not None:
                 # Only promote models that have been explicitly selected for backfill.
                 promotable_snapshot_ids = {
@@ -225,12 +225,14 @@ class Plan(PydanticModel, frozen=True):
                         if m in snapshots_by_name
                     ],
                 }
-            else:
+            elif not self.include_unmodified:
                 promotable_snapshot_ids = self.context_diff.promotable_snapshot_ids.copy()
-            promoted_snapshot_ids = [
-                s.snapshot_id for s in snapshots if s.snapshot_id in promotable_snapshot_ids
-            ]
+        promoted_snapshot_ids = (
+            [s.snapshot_id for s in snapshots if s.snapshot_id in promotable_snapshot_ids]
+            if promotable_snapshot_ids is not None
+            else None
+        )
         previous_finalized_snapshots = (
             self.context_diff.environment_snapshots

sqlmesh/core/renderer.py CHANGED Viewed

@@ -196,7 +196,14 @@ class BaseExpressionRenderer:
             **kwargs,
         }
+        if this_model:
+            render_kwargs["this_model"] = this_model
+        macro_evaluator.locals.update(render_kwargs)
         variables = kwargs.pop("variables", {})
+        if variables:
+            macro_evaluator.locals.setdefault(c.SQLMESH_VARS, {}).update(variables)
         expressions = [self._expression]
         if isinstance(self._expression, d.Jinja):
@@ -268,14 +275,6 @@ class BaseExpressionRenderer:
                             f"Could not parse the rendered jinja at '{self._path}'.\n{ex}"
                         ) from ex
-        if this_model:
-            render_kwargs["this_model"] = this_model
-        macro_evaluator.locals.update(render_kwargs)
-        if variables:
-            macro_evaluator.locals.setdefault(c.SQLMESH_VARS, {}).update(variables)
         for definition in self._macro_definitions:
             try:
                 macro_evaluator.evaluate(definition)

sqlmesh/core/scheduler.py CHANGED Viewed

@@ -352,7 +352,7 @@ class Scheduler:
             )
             for snapshot, intervals in merged_intervals.items()
         }
-        snapshot_batches = {}
+        snapshot_batches: t.Dict[Snapshot, Intervals] = {}
         all_unready_intervals: t.Dict[str, set[Interval]] = {}
         for snapshot_id in dag:
             if snapshot_id not in snapshot_intervals:
@@ -364,6 +364,14 @@ class Scheduler:
             adapter = self.snapshot_evaluator.get_adapter(snapshot.model_gateway)
+            parent_intervals: Intervals = []
+            for parent_id in snapshot.parents:
+                parent_snapshot, _ = snapshot_intervals.get(parent_id, (None, []))
+                if not parent_snapshot or parent_snapshot.is_external:
+                    continue
+                parent_intervals.extend(snapshot_batches[parent_snapshot])
             context = ExecutionContext(
                 adapter,
                 self.snapshots_by_name,
@@ -371,6 +379,7 @@ class Scheduler:
                 default_dialect=adapter.dialect,
                 default_catalog=self.default_catalog,
                 is_restatement=is_restatement,
+                parent_intervals=parent_intervals,
             )
             intervals = self._check_ready_intervals(
@@ -538,6 +547,10 @@ class Scheduler:
                             execution_time=execution_time,
                         )
                     else:
+                        # If batch_index > 0, then the target table must exist since the first batch would have created it
+                        target_table_exists = (
+                            snapshot.snapshot_id not in snapshots_to_create or node.batch_index > 0
+                        )
                         audit_results = self.evaluate(
                             snapshot=snapshot,
                             environment_naming_info=environment_naming_info,
@@ -548,7 +561,7 @@ class Scheduler:
                             batch_index=node.batch_index,
                             allow_destructive_snapshots=allow_destructive_snapshots,
                             allow_additive_snapshots=allow_additive_snapshots,
-                            target_table_exists=snapshot.snapshot_id not in snapshots_to_create,
+                            target_table_exists=target_table_exists,
                             selected_models=selected_models,
                         )
@@ -646,6 +659,7 @@ class Scheduler:
         }
         snapshots_to_create = snapshots_to_create or set()
         original_snapshots_to_create = snapshots_to_create.copy()
+        upstream_dependencies_cache: t.Dict[SnapshotId, t.Set[SchedulingUnit]] = {}
         snapshot_dag = snapshot_dag or snapshots_to_dag(batches)
         dag = DAG[SchedulingUnit]()
@@ -657,12 +671,15 @@ class Scheduler:
             snapshot = self.snapshots_by_name[snapshot_id.name]
             intervals = intervals_per_snapshot.get(snapshot.name, [])
-            upstream_dependencies: t.List[SchedulingUnit] = []
+            upstream_dependencies: t.Set[SchedulingUnit] = set()
             for p_sid in snapshot.parents:
-                upstream_dependencies.extend(
+                upstream_dependencies.update(
                     self._find_upstream_dependencies(
-                        p_sid, intervals_per_snapshot, original_snapshots_to_create
+                        p_sid,
+                        intervals_per_snapshot,
+                        original_snapshots_to_create,
+                        upstream_dependencies_cache,
                     )
                 )
@@ -713,29 +730,42 @@ class Scheduler:
         parent_sid: SnapshotId,
         intervals_per_snapshot: t.Dict[str, Intervals],
         snapshots_to_create: t.Set[SnapshotId],
-    ) -> t.List[SchedulingUnit]:
+        cache: t.Dict[SnapshotId, t.Set[SchedulingUnit]],
+    ) -> t.Set[SchedulingUnit]:
         if parent_sid not in self.snapshots:
-            return []
+            return set()
+        if parent_sid in cache:
+            return cache[parent_sid]
         p_intervals = intervals_per_snapshot.get(parent_sid.name, [])
+        parent_node: t.Optional[SchedulingUnit] = None
         if p_intervals:
             if len(p_intervals) > 1:
-                return [DummyNode(snapshot_name=parent_sid.name)]
-            interval = p_intervals[0]
-            return [EvaluateNode(snapshot_name=parent_sid.name, interval=interval, batch_index=0)]
-        if parent_sid in snapshots_to_create:
-            return [CreateNode(snapshot_name=parent_sid.name)]
+                parent_node = DummyNode(snapshot_name=parent_sid.name)
+            else:
+                interval = p_intervals[0]
+                parent_node = EvaluateNode(
+                    snapshot_name=parent_sid.name, interval=interval, batch_index=0
+                )
+        elif parent_sid in snapshots_to_create:
+            parent_node = CreateNode(snapshot_name=parent_sid.name)
+        if parent_node is not None:
+            cache[parent_sid] = {parent_node}
+            return {parent_node}
         # This snapshot has no intervals and doesn't need creation which means
         # that it can be a transitive dependency
-        transitive_deps: t.List[SchedulingUnit] = []
+        transitive_deps: t.Set[SchedulingUnit] = set()
         parent_snapshot = self.snapshots[parent_sid]
         for grandparent_sid in parent_snapshot.parents:
-            transitive_deps.extend(
+            transitive_deps.update(
                 self._find_upstream_dependencies(
-                    grandparent_sid, intervals_per_snapshot, snapshots_to_create
+                    grandparent_sid, intervals_per_snapshot, snapshots_to_create, cache
                 )
             )
+        cache[parent_sid] = transitive_deps
         return transitive_deps
     def _run_or_audit(

sqlmesh/core/signal.py CHANGED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 import typing as t
 from sqlmesh.utils import UniqueKeyDict, registry_decorator
+from sqlmesh.utils.errors import MissingSourceError
 if t.TYPE_CHECKING:
     from sqlmesh.core.context import ExecutionContext
@@ -42,7 +43,16 @@ SignalRegistry = UniqueKeyDict[str, signal]
 @signal()
-def freshness(batch: DatetimeRanges, snapshot: Snapshot, context: ExecutionContext) -> bool:
+def freshness(
+    batch: DatetimeRanges,
+    snapshot: Snapshot,
+    context: ExecutionContext,
+) -> bool:
+    """
+    Implements model freshness as a signal, i.e it considers this model to be fresh if:
+    - Any upstream SQLMesh model has available intervals to compute i.e is fresh
+    - Any upstream external model has been altered since the last time the model was evaluated
+    """
     adapter = context.engine_adapter
     if context.is_restatement or not adapter.SUPPORTS_METADATA_TABLE_LAST_MODIFIED_TS:
         return True
@@ -54,24 +64,35 @@ def freshness(batch: DatetimeRanges, snapshot: Snapshot, context: ExecutionConte
         if deployability_index.is_deployable(snapshot)
         else snapshot.dev_last_altered_ts
     )
     if not last_altered_ts:
         return True
     parent_snapshots = {context.snapshots[p.name] for p in snapshot.parents}
-    if len(parent_snapshots) != len(snapshot.node.depends_on) or not all(
-        p.is_external for p in parent_snapshots
-    ):
-        # The mismatch can happen if e.g an external model is not registered in the project
+    upstream_parent_snapshots = {p for p in parent_snapshots if not p.is_external}
+    external_parents = snapshot.node.depends_on - {p.name for p in upstream_parent_snapshots}
+    if context.parent_intervals:
+        # At least one upstream sqlmesh model has intervals to compute (i.e is fresh),
+        # so the current model is considered fresh too
         return True
-    # Finding new data means that the upstream depedencies have been altered
-    # since the last time the model was evaluated
-    upstream_dep_has_new_data = any(
-        upstream_last_altered_ts > last_altered_ts
-        for upstream_last_altered_ts in adapter.get_table_last_modified_ts(
-            [p.name for p in parent_snapshots]
+    if external_parents:
+        external_last_altered_timestamps = adapter.get_table_last_modified_ts(
+            list(external_parents)
+        )
+        if len(external_last_altered_timestamps) != len(external_parents):
+            raise MissingSourceError(
+                f"Expected {len(external_parents)} sources to be present, but got {len(external_last_altered_timestamps)}."
+            )
+        # Finding new data means that the upstream depedencies have been altered
+        # since the last time the model was evaluated
+        return any(
+            external_last_altered_ts > last_altered_ts
+            for external_last_altered_ts in external_last_altered_timestamps
         )
-    )
-    # Returning true is a no-op, returning False nullifies the batch so the model will not be evaluated.
-    return upstream_dep_has_new_data
+    return False

sqlmesh/core/snapshot/definition.py CHANGED Viewed

@@ -2081,16 +2081,20 @@ def missing_intervals(
                 continue
             snapshot_end_date = existing_interval_end
+        snapshot_start_date = max(
+            to_datetime(snapshot_start_date),
+            to_datetime(start_date(snapshot, snapshots, cache, relative_to=snapshot_end_date)),
+        )
+        if snapshot_start_date > to_datetime(snapshot_end_date):
+            continue
         missing_interval_end_date = snapshot_end_date
         node_end_date = snapshot.node.end
         if node_end_date and (to_datetime(node_end_date) < to_datetime(snapshot_end_date)):
             missing_interval_end_date = node_end_date
         intervals = snapshot.missing_intervals(
-            max(
-                to_datetime(snapshot_start_date),
-                to_datetime(start_date(snapshot, snapshots, cache, relative_to=snapshot_end_date)),
-            ),
+            snapshot_start_date,
             missing_interval_end_date,
             execution_time=execution_time,
             deployability_index=deployability_index,
@@ -2295,14 +2299,16 @@ def start_date(
     if not isinstance(snapshots, dict):
         snapshots = {snapshot.snapshot_id: snapshot for snapshot in snapshots}
-    earliest = snapshot.node.cron_prev(snapshot.node.cron_floor(relative_to or now()))
-    for parent in snapshot.parents:
-        if parent in snapshots:
-            earliest = min(
-                earliest,
-                start_date(snapshots[parent], snapshots, cache=cache, relative_to=relative_to),
-            )
+    parent_starts = [
+        start_date(snapshots[parent], snapshots, cache=cache, relative_to=relative_to)
+        for parent in snapshot.parents
+        if parent in snapshots
+    ]
+    earliest = (
+        min(parent_starts)
+        if parent_starts
+        else snapshot.node.cron_prev(snapshot.node.cron_floor(relative_to or now()))
+    )
     cache[key] = earliest
     return earliest

sqlmesh/core/snapshot/evaluator.py CHANGED Viewed

@@ -1021,6 +1021,11 @@ class SnapshotEvaluator:
         ):
             import pandas as pd
+            try:
+                first_query_or_df = next(queries_or_dfs)
+            except StopIteration:
+                return
             query_or_df = reduce(
                 lambda a, b: (
                     pd.concat([a, b], ignore_index=True)  # type: ignore
@@ -1028,6 +1033,7 @@ class SnapshotEvaluator:
                     else a.union_all(b)  # type: ignore
                 ),  # type: ignore
                 queries_or_dfs,
+                first_query_or_df,
             )
             apply(query_or_df, index=0)
         else:
@@ -1593,14 +1599,14 @@ class SnapshotEvaluator:
         tables_by_gateway_and_schema: t.Dict[t.Union[str, None], t.Dict[exp.Table, set[str]]] = (
             defaultdict(lambda: defaultdict(set))
         )
-        snapshots_by_table_name: t.Dict[str, Snapshot] = {}
+        snapshots_by_table_name: t.Dict[exp.Table, t.Dict[str, Snapshot]] = defaultdict(dict)
         for snapshot in target_snapshots:
             if not snapshot.is_model or snapshot.is_symbolic:
                 continue
             table = table_name_callable(snapshot)
             table_schema = d.schema_(table.db, catalog=table.catalog)
             tables_by_gateway_and_schema[snapshot.model_gateway][table_schema].add(table.name)
-            snapshots_by_table_name[table.name] = snapshot
+            snapshots_by_table_name[table_schema][table.name] = snapshot
         def _get_data_objects_in_schema(
             schema: exp.Table,
@@ -1613,23 +1619,25 @@ class SnapshotEvaluator:
             )
         with self.concurrent_context():
-            existing_objects: t.List[DataObject] = []
+            snapshot_id_to_obj: t.Dict[SnapshotId, DataObject] = {}
             # A schema can be shared across multiple engines, so we need to group tables by both gateway and schema
             for gateway, tables_by_schema in tables_by_gateway_and_schema.items():
-                objs_for_gateway = [
-                    obj
-                    for objs in concurrent_apply_to_values(
-                        list(tables_by_schema),
-                        lambda s: _get_data_objects_in_schema(
-                            schema=s, object_names=tables_by_schema.get(s), gateway=gateway
-                        ),
-                        self.ddl_concurrent_tasks,
-                    )
-                    for obj in objs
-                ]
-                existing_objects.extend(objs_for_gateway)
+                schema_list = list(tables_by_schema.keys())
+                results = concurrent_apply_to_values(
+                    schema_list,
+                    lambda s: _get_data_objects_in_schema(
+                        schema=s, object_names=tables_by_schema.get(s), gateway=gateway
+                    ),
+                    self.ddl_concurrent_tasks,
+                )
+                for schema, objs in zip(schema_list, results):
+                    snapshots_by_name = snapshots_by_table_name.get(schema, {})
+                    for obj in objs:
+                        if obj.name in snapshots_by_name:
+                            snapshot_id_to_obj[snapshots_by_name[obj.name].snapshot_id] = obj
-        return {snapshots_by_table_name[obj.name].snapshot_id: obj for obj in existing_objects}
+        return snapshot_id_to_obj
 def _evaluation_strategy(snapshot: SnapshotInfoLike, adapter: EngineAdapter) -> EvaluationStrategy:

sqlmesh/core/test/definition.py CHANGED Viewed

@@ -355,11 +355,12 @@ class ModelTest(unittest.TestCase):
                         for df in _split_df_by_column_pairs(diff)
                     )
                 else:
-                    from pandas import MultiIndex
+                    from pandas import DataFrame, MultiIndex
                     levels = t.cast(MultiIndex, diff.columns).levels[0]
                     for col in levels:
-                        col_diff = diff[col]
+                        # diff[col] returns a DataFrame when columns is a MultiIndex
+                        col_diff = t.cast(DataFrame, diff[col])
                         if not col_diff.empty:
                             table = df_to_table(
                                 f"[bold red]Column '{col}' mismatch{failed_subtest}[/bold red]",
@@ -807,7 +808,7 @@ class PythonModelTest(ModelTest):
             actual_df.reset_index(drop=True, inplace=True)
             expected = self._create_df(values, columns=self.model.columns_to_types, partial=partial)
-            self.assert_equal(expected, actual_df, sort=False, partial=partial)
+            self.assert_equal(expected, actual_df, sort=True, partial=partial)
     def _execute_model(self) -> pd.DataFrame:
         """Executes the python model and returns a DataFrame."""
@@ -925,8 +926,7 @@ def generate_test(
                 cte_output = test._execute(cte_query)
                 ctes[cte.alias] = (
                     pandas_timestamp_to_pydatetime(
-                        cte_output.apply(lambda col: col.map(_normalize_df_value)),
-                        cte_query.named_selects,
+                        df=cte_output.apply(lambda col: col.map(_normalize_df_value)),
                     )
                     .replace({np.nan: None})
                     .to_dict(orient="records")

sqlmesh/core/test/discovery.py CHANGED Viewed

@@ -20,6 +20,10 @@ class ModelTestMetadata(PydanticModel):
     def fully_qualified_test_name(self) -> str:
         return f"{self.path}::{self.test_name}"
+    @property
+    def model_name(self) -> str:
+        return self.body.get("model", "")
     def __hash__(self) -> int:
         return self.fully_qualified_test_name.__hash__()

sqlmesh/dbt/common.py CHANGED Viewed

@@ -46,7 +46,9 @@ def load_yaml(source: str | Path) -> t.Dict:
         raise ConfigError(f"{source}: {ex}" if isinstance(source, Path) else f"{ex}")
-def parse_meta(v: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]:
+def parse_meta(v: t.Optional[t.Dict[str, t.Any]]) -> t.Dict[str, t.Any]:
+    if v is None:
+        return {}
     for key, value in v.items():
         if isinstance(value, str):
             v[key] = try_str_to_bool(value)
@@ -115,7 +117,7 @@ class GeneralConfig(DbtConfig):
     @field_validator("meta", mode="before")
     @classmethod
-    def _validate_meta(cls, v: t.Dict[str, t.Union[str, t.Any]]) -> t.Dict[str, t.Any]:
+    def _validate_meta(cls, v: t.Optional[t.Dict[str, t.Union[str, t.Any]]]) -> t.Dict[str, t.Any]:
         return parse_meta(v)
     _FIELD_UPDATE_STRATEGY: t.ClassVar[t.Dict[str, UpdateStrategy]] = {

sqlmesh/dbt/manifest.py CHANGED Viewed

@@ -11,7 +11,7 @@ from collections import defaultdict
 from functools import cached_property
 from pathlib import Path
-from dbt import constants as dbt_constants, flags
+from dbt import flags
 from sqlmesh.dbt.util import DBT_VERSION
 from sqlmesh.utils.conversions import make_serializable
@@ -19,6 +19,8 @@ from sqlmesh.utils.conversions import make_serializable
 # Override the file name to prevent dbt commands from invalidating the cache.
 if DBT_VERSION >= (1, 6, 0):
+    from dbt import constants as dbt_constants
     dbt_constants.PARTIAL_PARSE_FILE_NAME = "sqlmesh_partial_parse.msgpack"  # type: ignore
 else:
     from dbt.parser import manifest as dbt_manifest  # type: ignore

sqlmesh/integrations/github/cicd/command.py CHANGED Viewed

@@ -25,12 +25,21 @@ logger = logging.getLogger(__name__)
     envvar="GITHUB_TOKEN",
     help="The Github Token to be used. Pass in `${{ secrets.GITHUB_TOKEN }}` if you want to use the one created by Github actions",
 )
+@click.option(
+    "--full-logs",
+    is_flag=True,
+    help="Whether to print all logs in the Github Actions output or only in their relevant GA check",
+)
 @click.pass_context
-def github(ctx: click.Context, token: str) -> None:
+def github(ctx: click.Context, token: str, full_logs: bool = False) -> None:
     """Github Action CI/CD Bot. See https://sqlmesh.readthedocs.io/en/stable/integrations/github/ for details"""
     # set a larger width because if none is specified, it auto-detects 80 characters when running in GitHub Actions
     # which can result in surprise newlines when outputting dates to backfill
-    set_console(MarkdownConsole(width=1000, warning_capture_only=True, error_capture_only=True))
+    set_console(
+        MarkdownConsole(
+            width=1000, warning_capture_only=not full_logs, error_capture_only=not full_logs
+        )
+    )
     ctx.obj["github"] = GithubController(
         paths=ctx.obj["paths"],
         token=token,

sqlmesh/integrations/github/cicd/controller.py CHANGED Viewed

@@ -448,10 +448,9 @@ class GithubController:
                 c.PROD,
                 # this is required to highlight any data gaps between this PR environment and prod (since PR environments may only contain a subset of data)
                 no_gaps=False,
-                # this works because the snapshots were already categorized when applying self.pr_plan so there are no uncategorized local snapshots to trigger a plan error
-                no_auto_categorization=True,
                 skip_tests=True,
                 skip_linter=True,
+                categorizer_config=self.bot_config.auto_categorize_changes,
                 run=self.bot_config.run_on_deploy_to_prod,
                 forward_only=self.forward_only_plan,
             )
@@ -773,6 +772,11 @@ class GithubController:
                 "PR is already merged and this event was triggered prior to the merge."
             )
         merge_status = self._get_merge_state_status()
+        if merge_status.is_blocked:
+            raise CICDBotError(
+                "Branch protection or ruleset requirement is likely not satisfied, e.g. missing CODEOWNERS approval. "
+                "Please check PR and resolve any issues."
+            )
         if merge_status.is_dirty:
             raise CICDBotError(
                 "Merge commit cannot be cleanly created. Likely from a merge conflict. "

sqlmesh 0.225.1.dev26__py3-none-any.whl → 0.228.2__py3-none-any.whl

Potentially problematic release.

sqlmesh 0.225.1.dev26py3-none-any.whl → 0.228.2py3-none-any.whl