PyPI - squirrels - Versions diffs - 0.5.0b3__py3-none-any.whl → 0.6.0.post0__py3-none-any.whl - Mend

squirrels 0.5.0b3py3-none-any.whl → 0.6.0.post0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

squirrels/__init__.py +4 -0
squirrels/_api_routes/__init__.py +5 -0
squirrels/_api_routes/auth.py +337 -0
squirrels/_api_routes/base.py +196 -0
squirrels/_api_routes/dashboards.py +156 -0
squirrels/_api_routes/data_management.py +148 -0
squirrels/_api_routes/datasets.py +220 -0
squirrels/_api_routes/project.py +289 -0
squirrels/_api_server.py +440 -792
squirrels/_arguments/__init__.py +0 -0
squirrels/_arguments/{_init_time_args.py → init_time_args.py} +23 -43
squirrels/_arguments/{_run_time_args.py → run_time_args.py} +32 -68
squirrels/_auth.py +590 -264
squirrels/_command_line.py +130 -58
squirrels/_compile_prompts.py +147 -0
squirrels/_connection_set.py +16 -15
squirrels/_constants.py +36 -11
squirrels/_dashboards.py +179 -0
squirrels/_data_sources.py +40 -34
squirrels/_dataset_types.py +16 -11
squirrels/_env_vars.py +209 -0
squirrels/_exceptions.py +9 -37
squirrels/_http_error_responses.py +52 -0
squirrels/_initializer.py +7 -6
squirrels/_logging.py +121 -0
squirrels/_manifest.py +155 -77
squirrels/_mcp_server.py +578 -0
squirrels/_model_builder.py +11 -55
squirrels/_model_configs.py +5 -5
squirrels/_model_queries.py +1 -1
squirrels/_models.py +276 -143
squirrels/_package_data/base_project/.env +1 -24
squirrels/_package_data/base_project/.env.example +31 -17
squirrels/_package_data/base_project/connections.yml +4 -3
squirrels/_package_data/base_project/dashboards/dashboard_example.py +13 -7
squirrels/_package_data/base_project/dashboards/dashboard_example.yml +6 -6
squirrels/_package_data/base_project/docker/Dockerfile +2 -2
squirrels/_package_data/base_project/docker/compose.yml +1 -1
squirrels/_package_data/base_project/duckdb_init.sql +1 -0
squirrels/_package_data/base_project/models/builds/build_example.py +2 -2
squirrels/_package_data/base_project/models/dbviews/dbview_example.sql +7 -2
squirrels/_package_data/base_project/models/dbviews/dbview_example.yml +16 -10
squirrels/_package_data/base_project/models/federates/federate_example.py +27 -17
squirrels/_package_data/base_project/models/federates/federate_example.sql +3 -7
squirrels/_package_data/base_project/models/federates/federate_example.yml +7 -7
squirrels/_package_data/base_project/models/sources.yml +5 -6
squirrels/_package_data/base_project/parameters.yml +24 -38
squirrels/_package_data/base_project/pyconfigs/connections.py +8 -3
squirrels/_package_data/base_project/pyconfigs/context.py +26 -14
squirrels/_package_data/base_project/pyconfigs/parameters.py +124 -81
squirrels/_package_data/base_project/pyconfigs/user.py +48 -15
squirrels/_package_data/base_project/resources/public/.gitkeep +0 -0
squirrels/_package_data/base_project/seeds/seed_categories.yml +1 -1
squirrels/_package_data/base_project/seeds/seed_subcategories.yml +1 -1
squirrels/_package_data/base_project/squirrels.yml.j2 +21 -31
squirrels/_package_data/templates/login_successful.html +53 -0
squirrels/_package_data/templates/squirrels_studio.html +22 -0
squirrels/_parameter_configs.py +43 -22
squirrels/_parameter_options.py +1 -1
squirrels/_parameter_sets.py +41 -30
squirrels/_parameters.py +560 -123
squirrels/_project.py +487 -277
squirrels/_py_module.py +71 -10
squirrels/_request_context.py +33 -0
squirrels/_schemas/__init__.py +0 -0
squirrels/_schemas/auth_models.py +83 -0
squirrels/_schemas/query_param_models.py +70 -0
squirrels/_schemas/request_models.py +26 -0
squirrels/_schemas/response_models.py +286 -0
squirrels/_seeds.py +52 -13
squirrels/_sources.py +29 -23
squirrels/_utils.py +221 -42
squirrels/_version.py +1 -3
squirrels/arguments.py +7 -2
squirrels/auth.py +4 -0
squirrels/connections.py +2 -0
squirrels/dashboards.py +3 -1
squirrels/data_sources.py +6 -0
squirrels/parameter_options.py +5 -0
squirrels/parameters.py +5 -0
squirrels/types.py +10 -3
squirrels-0.6.0.post0.dist-info/METADATA +148 -0
squirrels-0.6.0.post0.dist-info/RECORD +101 -0
{squirrels-0.5.0b3.dist-info → squirrels-0.6.0.post0.dist-info}/WHEEL +1 -1
squirrels/_api_response_models.py +0 -190
squirrels/_dashboard_types.py +0 -82
squirrels/_dashboards_io.py +0 -79
squirrels-0.5.0b3.dist-info/METADATA +0 -110
squirrels-0.5.0b3.dist-info/RECORD +0 -80
/squirrels/_package_data/base_project/{assets → resources}/expenses.db +0 -0
/squirrels/_package_data/base_project/{assets → resources}/weather.db +0 -0
{squirrels-0.5.0b3.dist-info → squirrels-0.6.0.post0.dist-info}/entry_points.txt +0 -0
{squirrels-0.5.0b3.dist-info → squirrels-0.6.0.post0.dist-info}/licenses/LICENSE +0 -0

squirrels/_project.py CHANGED Viewed

@@ -1,39 +1,25 @@
-from dotenv import dotenv_values
-from uuid import uuid4
+from typing import TYPE_CHECKING
+from dotenv import dotenv_values, load_dotenv
 from pathlib import Path
 import asyncio, typing as t, functools as ft, shutil, json, os
-import logging as l, matplotlib.pyplot as plt, networkx as nx, polars as pl
-import sqlglot, sqlglot.expressions
+import sqlglot, sqlglot.expressions, duckdb, polars as pl
-from ._auth import Authenticator, BaseUser
+from ._auth import Authenticator, AuthProviderArgs, ProviderFunctionType
+from ._schemas.auth_models import CustomUserFields, AbstractUser, GuestUser, RegisteredUser
+from ._schemas import response_models as rm
 from ._model_builder import ModelBuilder
+from ._env_vars import SquirrelsEnvVars
 from ._exceptions import InvalidInputError, ConfigurationError
-from . import _utils as u, _constants as c, _manifest as mf, _connection_set as cs, _api_response_models as arm
+from ._py_module import PyModule
+from . import _dashboards as d, _utils as u, _constants as c, _manifest as mf, _connection_set as cs
 from . import _seeds as s, _models as m, _model_configs as mc, _model_queries as mq, _sources as so
-from . import _parameter_sets as ps, _dashboards_io as d, _dashboard_types as dash, _dataset_types as dr
-T = t.TypeVar("T", bound=dash.Dashboard)
-M = t.TypeVar("M", bound=m.DataModel)
+from . import _parameter_sets as ps, _dataset_types as dr, _logging as l
+if TYPE_CHECKING:
+    from ._api_server import FastAPIComponents
-class _CustomJsonFormatter(l.Formatter):
-    def format(self, record: l.LogRecord) -> str:
-        super().format(record)
-        info = {
-            "timestamp": self.formatTime(record),
-            "project_id": record.name,
-            "level": record.levelname,
-            "message": record.getMessage(),
-            "thread": record.thread,
-            "thread_name": record.threadName,
-            "process": record.process,
-            **record.__dict__.get("info", {})
-        }
-        output = {
-            "data": record.__dict__.get("data", {}),
-            "info": info
-        }
-        return json.dumps(output)
+T = t.TypeVar("T", bound=d.Dashboard)
+M = t.TypeVar("M", bound=m.DataModel)
 class SquirrelsProject:
@@ -41,114 +27,179 @@ class SquirrelsProject:
     Initiate an instance of this class to interact with a Squirrels project through Python code. For example this can be handy to experiment with the datasets produced by Squirrels in a Jupyter notebook.
     """
-    def __init__(self, *, filepath: str = ".", log_file: str | None = c.LOGS_FILE, log_level: str = "INFO", log_format: str = "text") -> None:
+    def __init__(
+        self, *, project_path: str = ".", load_dotenv_globally: bool = False,
+        log_to_file: bool = False, log_level: str | None = None, log_format: str | None = None,
+    ) -> None:
         """
         Constructor for SquirrelsProject class. Loads the file contents of the Squirrels project into memory as member fields.
         Arguments:
-            filepath: The path to the Squirrels project file. Defaults to the current working directory.
-            log_level: The logging level to use. Options are "DEBUG", "INFO", and "WARNING". Default is "INFO".
-            log_file: The name of the log file to write to from the "logs/" subfolder. If None or empty string, then file logging is disabled. Default is "squirrels.log".
-            log_format: The format of the log records. Options are "text" and "json". Default is "text".
+            project_path: The path to the Squirrels project file. Defaults to the current working directory.
+            log_level: The logging level to use. Options are "DEBUG", "INFO", and "WARNING". Default is from SQRL_LOGGING__LEVEL environment variable or "INFO".
+            log_to_file: Whether to enable logging to file(s) in the "logs/" folder (or a custom folder). Default is from SQRL_LOGGING__TO_FILE environment variable or False.
+            log_format: The format of the log records. Options are "text" and "json". Default is from SQRL_LOGGING__FORMAT environment variable or "text".
         """
-        self._filepath = filepath
-        self._logger = self._get_logger(self._filepath, log_file, log_level, log_format)
-    def _get_logger(self, base_path: str, log_file: str | None, log_level: str, log_format: str) -> u.Logger:
-        logger = u.Logger(name=uuid4().hex)
-        logger.setLevel(log_level.upper())
-        handler = l.StreamHandler()
-        handler.setLevel("WARNING")
-        handler.setFormatter(l.Formatter("%(levelname)s:   %(asctime)s - %(message)s"))
-        logger.addHandler(handler)
-        if log_format.lower() == "json":
-            formatter = _CustomJsonFormatter()
-        elif log_format.lower() == "text":
-            formatter = l.Formatter("[%(name)s] %(asctime)s - %(levelname)s - %(message)s")
-        else:
-            raise ValueError("log_format must be either 'text' or 'json'")
-        if log_file:
-            path = Path(base_path, c.LOGS_FOLDER, log_file)
-            path.parent.mkdir(parents=True, exist_ok=True)
+        project_path = str(Path(project_path).resolve())
-            handler = l.FileHandler(path)
-            handler.setFormatter(formatter)
-            logger.addHandler(handler)
+        self._project_path = project_path
+        self._env_vars_unformatted = self._load_env_vars(project_path, load_dotenv_globally)
+        self._env_vars = SquirrelsEnvVars(project_path=project_path, **self._env_vars_unformatted)
+        self._vdl_catalog_db_path = self._env_vars.vdl_catalog_db_path
-        return logger
-    @ft.cached_property
-    def _env_vars(self) -> dict[str, str]:
+        self._logger = self._get_logger(project_path, self._env_vars, log_to_file, log_level, log_format)
+        self._ensure_virtual_datalake_exists(project_path, self._vdl_catalog_db_path, self._env_vars.vdl_data_path)
+    @staticmethod
+    def _load_env_vars(project_path: str, load_dotenv_globally: bool) -> dict[str, str]:
         dotenv_files = [c.DOTENV_FILE, c.DOTENV_LOCAL_FILE]
         dotenv_vars = {}
         for file in dotenv_files:
-            dotenv_vars.update({k: v for k, v in dotenv_values(f"{self._filepath}/{file}").items() if v is not None})
+            full_path = u.Path(project_path, file)
+            if load_dotenv_globally:
+                load_dotenv(full_path)
+            dotenv_vars.update({k: v for k, v in dotenv_values(full_path).items() if v is not None})
         return {**os.environ, **dotenv_vars}
+    @staticmethod
+    def _get_logger(
+        filepath: str, env_vars: SquirrelsEnvVars, log_to_file: bool, log_level: str | None, log_format: str | None
+    ) -> u.Logger:
+        # CLI arguments take precedence over environment variables
+        log_level = log_level if log_level is not None else env_vars.logging_level
+        log_format = log_format if log_format is not None else env_vars.logging_format
+        log_to_file = env_vars.logging_to_file or log_to_file
+        log_file_size_mb = float(env_vars.logging_file_size_mb)
+        log_file_backup_count = int(env_vars.logging_file_backup_count)
+        return l.get_logger(filepath, log_to_file, log_level, log_format, log_file_size_mb, log_file_backup_count)
+    @staticmethod
+    def _ensure_virtual_datalake_exists(project_path: str, vdl_catalog_db_path: str, vdl_data_path: str) -> None:
+        target_path = u.Path(project_path, c.TARGET_FOLDER)
+        target_path.mkdir(parents=True, exist_ok=True)
+        # Attempt to set up the virtual data lake with DATA_PATH if possible
+        try:
+            is_ducklake = vdl_catalog_db_path.startswith("ducklake:")
+            options = f"(DATA_PATH '{vdl_data_path}')" if is_ducklake else ""
+            attach_stmt = f"ATTACH '{vdl_catalog_db_path}' AS vdl {options}"
+            with duckdb.connect() as conn:
+                conn.execute(attach_stmt)
+                # TODO: support incremental loads for build models and avoid cleaning up old files all the time
+                conn.execute("CALL ducklake_expire_snapshots('vdl', older_than => now())")
+                conn.execute("CALL ducklake_cleanup_old_files('vdl', cleanup_all => true)")
+        except Exception as e:
+            if "DATA_PATH parameter" in str(e):
+                first_line = str(e).split("\n")[0]
+                note = "NOTE: Squirrels does not allow changing the data path for an existing Virtual Data Lake (VDL)"
+                raise u.ConfigurationError(f"{first_line}\n\n{note}")
+            if is_ducklake and not any(x in vdl_catalog_db_path for x in [":sqlite:", ":postgres:", ":mysql:"]):
+                extended_error = "\n- Note: if you're using DuckDB for the metadata database, only one process can connect to the VDL at a time."
+            else:
+                extended_error = ""
+            raise u.ConfigurationError(f"Failed to attach Virtual Data Lake (VDL).{extended_error}") from e
     @ft.cached_property
     def _manifest_cfg(self) -> mf.ManifestConfig:
-        return mf.ManifestIO.load_from_file(self._logger, self._filepath, self._env_vars)
+        return mf.ManifestIO.load_from_file(self._logger, self._project_path, self._env_vars_unformatted)
     @ft.cached_property
     def _seeds(self) -> s.Seeds:
-        return s.SeedsIO.load_files(self._logger, self._filepath, self._env_vars)
+        return s.SeedsIO.load_files(self._logger, self._env_vars)
     @ft.cached_property
     def _sources(self) -> so.Sources:
-        return so.SourcesIO.load_file(self._logger, self._filepath, self._env_vars)
+        return so.SourcesIO.load_file(self._logger, self._env_vars, self._env_vars_unformatted)
     @ft.cached_property
     def _build_model_files(self) -> dict[str, mq.QueryFileWithConfig]:
-        return m.ModelsIO.load_build_files(self._logger, self._filepath)
+        return m.ModelsIO.load_build_files(self._logger, self._env_vars)
     @ft.cached_property
     def _dbview_model_files(self) -> dict[str, mq.QueryFileWithConfig]:
-        return m.ModelsIO.load_dbview_files(self._logger, self._filepath, self._env_vars)
+        return m.ModelsIO.load_dbview_files(self._logger, self._env_vars)
     @ft.cached_property
     def _federate_model_files(self) -> dict[str, mq.QueryFileWithConfig]:
-        return m.ModelsIO.load_federate_files(self._logger, self._filepath)
+        return m.ModelsIO.load_federate_files(self._logger, self._env_vars)
     @ft.cached_property
     def _context_func(self) -> m.ContextFunc:
-        return m.ModelsIO.load_context_func(self._logger, self._filepath)
+        return m.ModelsIO.load_context_func(self._logger, self._project_path)
     @ft.cached_property
     def _dashboards(self) -> dict[str, d.DashboardDefinition]:
-        return d.DashboardsIO.load_files(self._logger, self._filepath)
+        return d.DashboardsIO.load_files(
+            self._logger, self._project_path, self._manifest_cfg.project_variables.auth_type, self._manifest_cfg.configurables
+        )
     @ft.cached_property
     def _conn_args(self) -> cs.ConnectionsArgs:
-        return cs.ConnectionSetIO.load_conn_py_args(self._logger, self._filepath, self._env_vars, self._manifest_cfg)
+        proj_vars = self._manifest_cfg.project_variables.model_dump()
+        conn_args = cs.ConnectionsArgs(self._project_path, proj_vars, self._env_vars_unformatted)
+        return conn_args
     @ft.cached_property
     def _conn_set(self) -> cs.ConnectionSet:
-        return cs.ConnectionSetIO.load_from_file(self._logger, self._filepath, self._manifest_cfg, self._conn_args)
+        return cs.ConnectionSetIO.load_from_file(self._logger, self._project_path, self._manifest_cfg, self._conn_args)
+    @ft.cached_property
+    def _custom_user_fields_cls_and_provider_functions(self) -> tuple[type[CustomUserFields], list[ProviderFunctionType]]:
+        user_module_path = u.Path(self._project_path, c.PYCONFIGS_FOLDER, c.USER_FILE)
+        user_module = PyModule(user_module_path, self._project_path)
+        # Load CustomUserFields class (adds to Authenticator.providers as side effect)
+        CustomUserFieldsCls = user_module.get_func_or_class("CustomUserFields", default_attr=CustomUserFields)
+        provider_functions = Authenticator.providers
+        Authenticator.providers = []
+        if not issubclass(CustomUserFieldsCls, CustomUserFields):
+            raise ConfigurationError(f"CustomUserFields class in '{c.USER_FILE}' must inherit from CustomUserFields")
+        return CustomUserFieldsCls, provider_functions
     @ft.cached_property
     def _auth(self) -> Authenticator:
-        return Authenticator(self._logger, self._filepath, self._env_vars)
+        auth_args = AuthProviderArgs(**self._conn_args.__dict__)
+        CustomUserFieldsCls, provider_functions = self._custom_user_fields_cls_and_provider_functions
+        external_only = (self._manifest_cfg.project_variables.auth_strategy == mf.AuthStrategy.EXTERNAL)
+        if external_only and len(provider_functions) != 1:
+            raise ConfigurationError(f"When auth_strategy is 'external', there must be exactly one auth provider function. Found {len(provider_functions)} auth providers.")
+        return Authenticator(
+            self._logger, self._env_vars, auth_args, provider_functions,
+            custom_user_fields_cls=CustomUserFieldsCls, external_only=external_only
+        )
     @ft.cached_property
-    def User(self) -> t.Type[BaseUser]:
-        return self._auth.User
+    def _guest_user(self) -> AbstractUser:
+        custom_fields = self._auth.CustomUserFields()
+        return GuestUser(username="", custom_fields=custom_fields)
+    @ft.cached_property
+    def _admin_user(self) -> AbstractUser:
+        custom_fields = self._auth.CustomUserFields()
+        return RegisteredUser(username="", access_level="admin", custom_fields=custom_fields)
     @ft.cached_property
     def _param_args(self) -> ps.ParametersArgs:
-        return ps.ParameterConfigsSetIO.get_param_args(self._conn_args)
+        conn_args = self._conn_args
+        return ps.ParametersArgs(**conn_args.__dict__)
     @ft.cached_property
     def _param_cfg_set(self) -> ps.ParameterConfigsSet:
         return ps.ParameterConfigsSetIO.load_from_file(
-            self._logger, self._filepath, self._manifest_cfg, self._seeds, self._conn_set, self._param_args
+            self._logger, self._env_vars, self._manifest_cfg, self._seeds, self._conn_set, self._param_args
         )
     @ft.cached_property
     def _j2_env(self) -> u.EnvironmentWithMacros:
-        env = u.EnvironmentWithMacros(self._logger, loader=u.j2.FileSystemLoader(self._filepath))
+        env = u.EnvironmentWithMacros(self._logger, loader=u.j2.FileSystemLoader(self._project_path))
         def value_to_str(value: t.Any, attribute: str | None = None) -> str:
             if attribute is None:
@@ -170,11 +221,26 @@ class SquirrelsProject:
         env.filters["quote_and_join"] = quote_and_join
         return env
-    @ft.cached_property
-    def _duckdb_venv_path(self) -> str:
-        duckdb_filepath_setting_val = self._env_vars.get(c.SQRL_DUCKDB_VENV_DB_FILE_PATH, f"{c.TARGET_FOLDER}/{c.DUCKDB_VENV_FILE}")
-        return str(Path(self._filepath, duckdb_filepath_setting_val))
+    def get_fastapi_components(
+        self, *, no_cache: bool = False, host: str = "localhost", port: int = 8000,
+        mount_path_format: str = "/analytics/{project_name}/v{project_version}"
+    ) -> "FastAPIComponents":
+        """
+        Get the FastAPI components for the Squirrels project including mount path, lifespan, and FastAPI app.
+        Arguments:
+            no_cache: Whether to disable caching for parameter options, datasets, and dashboard results in the API server.
+            host: The host the API server will listen on. Only used for the welcome banner.
+            port: The port the API server will listen on. Only used for the welcome banner.
+            mount_path_format: The format of the mount path. Use {project_name} and {project_version} as placeholders.
+        Returns:
+            A FastAPIComponents object containing the mount path, lifespan, and FastAPI app.
+        """
+        from ._api_server import ApiServer
+        api_server = ApiServer(no_cache=no_cache, project=self)
+        return api_server.get_fastapi_components(host=host, port=port, mount_path_format=mount_path_format)
     def close(self) -> None:
         """
         Deliberately close any open resources within the Squirrels project, such as database connections (instead of relying on the garbage collector).
@@ -182,6 +248,9 @@ class SquirrelsProject:
         self._conn_set.dispose()
         self._auth.close()
+    def __enter__(self):
+        return self
     def __exit__(self, exc_type, exc_val, traceback):
         self.close()
@@ -197,60 +266,59 @@ class SquirrelsProject:
         seeds_dict = self._seeds.get_dataframes()
         for key, seed in seeds_dict.items():
-            self._add_model(models_dict, m.Seed(key, seed.config, seed.df, logger=self._logger, env_vars=self._env_vars, conn_set=self._conn_set))
+            self._add_model(models_dict, m.Seed(key, seed.config, seed.df, logger=self._logger, conn_set=self._conn_set))
         for source_name, source_config in self._sources.sources.items():
-            self._add_model(models_dict, m.SourceModel(source_name, source_config, logger=self._logger, env_vars=self._env_vars, conn_set=self._conn_set))
+            self._add_model(models_dict, m.SourceModel(source_name, source_config, logger=self._logger, conn_set=self._conn_set))
         for name, val in self._build_model_files.items():
-            model = m.BuildModel(name, val.config, val.query_file, logger=self._logger, env_vars=self._env_vars, conn_set=self._conn_set, j2_env=self._j2_env)
+            model = m.BuildModel(name, val.config, val.query_file, logger=self._logger, conn_set=self._conn_set, j2_env=self._j2_env)
             self._add_model(models_dict, model)
         return models_dict
-    async def build(self, *, full_refresh: bool = False, select: str | None = None, stage_file: bool = False) -> None:
+    async def build(self, *, full_refresh: bool = False, select: str | None = None) -> None:
         """
-        Build the virtual data environment for the Squirrels project
+        Build the Virtual Data Lake (VDL) for the Squirrels project
         Arguments:
-            full_refresh: Whether to drop all tables and rebuild the virtual data environment from scratch. Default is False.
-            stage_file: Whether to stage the DuckDB file to overwrite the existing one later if the virtual data environment is in use. Default is False.
+            full_refresh: Whether to drop all tables and rebuild the VDL from scratch. Default is False.
+            select: The name of a specific model to build. If None, all models are built. Default is None.
         """
         models_dict: dict[str, m.StaticModel] = self._get_static_models()
-        builder = ModelBuilder(self._duckdb_venv_path, self._conn_set, models_dict, self._conn_args, self._logger)
-        await builder.build(full_refresh, select, stage_file)
+        builder = ModelBuilder(self._vdl_catalog_db_path, self._conn_set, models_dict, self._conn_args, self._logger)
+        await builder.build(full_refresh, select)
     def _get_models_dict(self, always_python_df: bool) -> dict[str, m.DataModel]:
-        models_dict: dict[str, m.DataModel] = dict(self._get_static_models())
+        models_dict: dict[str, m.DataModel] = self._get_static_models()
         for name, val in self._dbview_model_files.items():
             self._add_model(models_dict, m.DbviewModel(
-                name, val.config, val.query_file, logger=self._logger, env_vars=self._env_vars, conn_set=self._conn_set, j2_env=self._j2_env
+                name, val.config, val.query_file, logger=self._logger, conn_set=self._conn_set, j2_env=self._j2_env
             ))
             models_dict[name].needs_python_df = always_python_df
         for name, val in self._federate_model_files.items():
             self._add_model(models_dict, m.FederateModel(
-                name, val.config, val.query_file, logger=self._logger, env_vars=self._env_vars, conn_set=self._conn_set, j2_env=self._j2_env
+                name, val.config, val.query_file, logger=self._logger, conn_set=self._conn_set, j2_env=self._j2_env
             ))
             models_dict[name].needs_python_df = always_python_df
         return models_dict
-    def _generate_dag(self, dataset: str, *, target_model_name: str | None = None, always_python_df: bool = False) -> m.DAG:
-        models_dict = self._get_models_dict(always_python_df)
+    def _generate_dag(self, dataset: str) -> m.DAG:
+        models_dict = self._get_models_dict(always_python_df=False)
         dataset_config = self._manifest_cfg.datasets[dataset]
-        target_model_name = dataset_config.model if target_model_name is None else target_model_name
-        target_model = models_dict[target_model_name]
+        target_model = models_dict[dataset_config.model]
         target_model.is_target = True
-        dag = m.DAG(dataset_config, target_model, models_dict, self._duckdb_venv_path, self._logger)
+        dag = m.DAG(dataset_config, target_model, models_dict, self._vdl_catalog_db_path, self._logger)
         return dag
-    def _generate_dag_with_fake_target(self, sql_query: str | None) -> m.DAG:
-        models_dict = self._get_models_dict(always_python_df=False)
+    def _generate_dag_with_fake_target(self, sql_query: str | None, *, always_python_df: bool = False) -> m.DAG:
+        models_dict = self._get_models_dict(always_python_df=always_python_df)
         if sql_query is None:
             dependencies = set(models_dict.keys())
@@ -260,227 +328,260 @@ class SquirrelsProject:
             substitutions = {}
             for model_name in dependencies:
                 model = models_dict[model_name]
-                if isinstance(model, m.SourceModel) and not model.model_config.load_to_duckdb:
-                    raise InvalidInputError(203, f"Source model '{model_name}' cannot be queried with DuckDB")
-                if isinstance(model, (m.SourceModel, m.BuildModel)):
-                    substitutions[model_name] = f"venv.{model_name}"
+                if isinstance(model, m.SourceModel) and not model.is_queryable:
+                    raise InvalidInputError(400, "cannot_query_source_model", f"Source model '{model_name}' cannot be queried with DuckDB")
+                if isinstance(model, m.BuildModel):
+                    substitutions[model_name] = f"vdl.{model_name}"
+                elif isinstance(model, m.SourceModel):
+                    if model.model_config.load_to_vdl:
+                        substitutions[model_name] = f"vdl.{model_name}"
+                    else:
+                        # DuckDB connection without load_to_vdl - reference via attached database
+                        conn_name = model.model_config.get_connection()
+                        table_name = model.model_config.get_table()
+                        substitutions[model_name] = f"db_{conn_name}.{table_name}"
             sql_query = parsed.transform(
-                lambda node: sqlglot.expressions.Table(this=substitutions[node.name])
+                lambda node: sqlglot.expressions.Table(this=substitutions[node.name], alias=node.alias)
                 if isinstance(node, sqlglot.expressions.Table) and node.name in substitutions
                 else node
             ).sql()
         model_config = mc.FederateModelConfig(depends_on=dependencies)
-        query_file = mq.SqlQueryFile("", sql_query or "")
+        query_file = mq.SqlQueryFile("", sql_query or "SELECT 1")
         fake_target_model = m.FederateModel(
-            "__fake_target", model_config, query_file, logger=self._logger, env_vars=self._env_vars, conn_set=self._conn_set, j2_env=self._j2_env
+            "__fake_target", model_config, query_file, logger=self._logger, conn_set=self._conn_set, j2_env=self._j2_env
         )
         fake_target_model.is_target = True
-        dag = m.DAG(None, fake_target_model, models_dict, self._duckdb_venv_path, self._logger)
+        dag = m.DAG(None, fake_target_model, models_dict, self._vdl_catalog_db_path, self._logger)
         return dag
-    def _draw_dag(self, dag: m.DAG, output_folder: Path) -> None:
-        color_map = {
-            m.ModelType.SEED: "green", m.ModelType.DBVIEW: "red", m.ModelType.FEDERATE: "skyblue",
-            m.ModelType.BUILD: "purple", m.ModelType.SOURCE: "orange"
-        }
-        G = dag.to_networkx_graph()
-        fig, _ = plt.subplots()
-        pos = nx.multipartite_layout(G, subset_key="layer")
-        colors = [color_map[node[1]] for node in G.nodes(data="model_type")] # type: ignore
-        nx.draw(G, pos=pos, node_shape='^', node_size=1000, node_color=colors, arrowsize=20)
-        y_values = [val[1] for val in pos.values()]
-        scale = max(y_values) - min(y_values) if len(y_values) > 0 else 0
-        label_pos = {key: (val[0], val[1]-0.002-0.1*scale) for key, val in pos.items()}
-        nx.draw_networkx_labels(G, pos=label_pos, font_size=8)
-        fig.tight_layout()
-        plt.margins(x=0.1, y=0.1)
-        fig.savefig(Path(output_folder, "dag.png"))
-        plt.close(fig)
-    async def _get_compiled_dag(self, *, sql_query: str | None = None, selections: dict[str, t.Any] = {}, user: BaseUser | None = None) -> m.DAG:
-        dag = self._generate_dag_with_fake_target(sql_query)
+    async def _get_compiled_dag(
+        self, user: AbstractUser, *, sql_query: str | None = None, selections: dict[str, t.Any] = {}, configurables: dict[str, str] = {},
+        always_python_df: bool = False
+    ) -> m.DAG:
+        dag = self._generate_dag_with_fake_target(sql_query, always_python_df=always_python_df)
-        default_traits = self._manifest_cfg.get_default_traits()
-        await dag.execute(self._param_args, self._param_cfg_set, self._context_func, user, selections, runquery=False, default_traits=default_traits)
+        configurables = {**self._manifest_cfg.get_default_configurables(), **configurables}
+        await dag.execute(
+            self._param_args, self._param_cfg_set, self._context_func, user, selections,
+            runquery=False, configurables=configurables
+        )
         return dag
-    def _get_all_connections(self) -> list[arm.ConnectionItemModel]:
+    def _get_all_connections(self) -> list[rm.ConnectionItemModel]:
         connections = []
         for conn_name, conn_props in self._conn_set.get_connections_as_dict().items():
             if isinstance(conn_props, mf.ConnectionProperties):
                 label = conn_props.label if conn_props.label is not None else conn_name
-                connections.append(arm.ConnectionItemModel(name=conn_name, label=label))
+                connections.append(rm.ConnectionItemModel(name=conn_name, label=label))
         return connections
-    def _get_all_data_models(self, compiled_dag: m.DAG) -> list[arm.DataModelItem]:
+    def _get_all_data_models(self, compiled_dag: m.DAG) -> list[rm.DataModelItem]:
         return compiled_dag.get_all_data_models()
-    async def get_all_data_models(self) -> list[arm.DataModelItem]:
+    async def get_all_data_models(self) -> list[rm.DataModelItem]:
         """
         Get all data models in the project
         Returns:
             A list of DataModelItem objects
         """
-        compiled_dag = await self._get_compiled_dag()
+        compiled_dag = await self._get_compiled_dag(self._admin_user)
         return self._get_all_data_models(compiled_dag)
-    def _get_all_data_lineage(self, compiled_dag: m.DAG) -> list[arm.LineageRelation]:
+    def _get_all_data_lineage(self, compiled_dag: m.DAG) -> list[rm.LineageRelation]:
         all_lineage = compiled_dag.get_all_model_lineage()
         # Add dataset nodes to the lineage
         for dataset in self._manifest_cfg.datasets.values():
-            target_dataset = arm.LineageNode(name=dataset.name, type="dataset")
-            source_model = arm.LineageNode(name=dataset.model, type="model")
-            all_lineage.append(arm.LineageRelation(type="runtime", source=source_model, target=target_dataset))
+            target_dataset = rm.LineageNode(name=dataset.name, type="dataset")
+            source_model = rm.LineageNode(name=dataset.model, type="model")
+            all_lineage.append(rm.LineageRelation(type="runtime", source=source_model, target=target_dataset))
         # Add dashboard nodes to the lineage
         for dashboard in self._dashboards.values():
-            target_dashboard = arm.LineageNode(name=dashboard.dashboard_name, type="dashboard")
+            target_dashboard = rm.LineageNode(name=dashboard.dashboard_name, type="dashboard")
             datasets = set(x.dataset for x in dashboard.config.depends_on)
             for dataset in datasets:
-                source_dataset = arm.LineageNode(name=dataset, type="dataset")
-                all_lineage.append(arm.LineageRelation(type="runtime", source=source_dataset, target=target_dashboard))
+                source_dataset = rm.LineageNode(name=dataset, type="dataset")
+                all_lineage.append(rm.LineageRelation(type="runtime", source=source_dataset, target=target_dashboard))
         return all_lineage
-    async def get_all_data_lineage(self) -> list[arm.LineageRelation]:
+    async def get_all_data_lineage(self) -> list[rm.LineageRelation]:
         """
         Get all data lineage in the project
         Returns:
             A list of LineageRelation objects
         """
-        compiled_dag = await self._get_compiled_dag()
+        compiled_dag = await self._get_compiled_dag(self._admin_user)
         return self._get_all_data_lineage(compiled_dag)
-    async def _write_dataset_outputs_given_test_set(
-        self, dataset: str, select: str, test_set: str | None, runquery: bool, recurse: bool
-    ) -> t.Any | None:
-        dataset_conf = self._manifest_cfg.datasets[dataset]
-        default_test_set_conf = self._manifest_cfg.get_default_test_set(dataset)
-        if test_set in self._manifest_cfg.selection_test_sets:
-            test_set_conf = self._manifest_cfg.selection_test_sets[test_set]
-        elif test_set is None or test_set == default_test_set_conf.name:
-            test_set, test_set_conf = default_test_set_conf.name, default_test_set_conf
-        else:
-            raise ConfigurationError(f"No test set named '{test_set}' was found when compiling dataset '{dataset}'. The test set must be defined if not default for dataset.")
-        error_msg_intro = f"Cannot compile dataset '{dataset}' with test set '{test_set}'."
-        if test_set_conf.datasets is not None and dataset not in test_set_conf.datasets:
-            raise ConfigurationError(f"{error_msg_intro}\n Applicable datasets for test set '{test_set}' does not include dataset '{dataset}'.")
-        user_attributes = test_set_conf.user_attributes.copy() if test_set_conf.user_attributes is not None else {}
-        selections = test_set_conf.parameters.copy()
-        username, is_admin = user_attributes.pop("username", ""), user_attributes.pop("is_admin", False)
-        if test_set_conf.is_authenticated:
-            user = self._auth.User(username=username, is_admin=is_admin, **user_attributes)
-        elif dataset_conf.scope == mf.PermissionScope.PUBLIC:
-            user = None
-        else:
-            raise ConfigurationError(f"{error_msg_intro}\n Non-public datasets require a test set with 'user_attributes' section defined")
-        if dataset_conf.scope == mf.PermissionScope.PRIVATE and not is_admin:
-            raise ConfigurationError(f"{error_msg_intro}\n Private datasets require a test set with user_attribute 'is_admin' set to true")
-        # always_python_df is set to True for creating CSV files from results (when runquery is True)
-        dag = self._generate_dag(dataset, target_model_name=select, always_python_df=runquery)
-        await dag.execute(
-            self._param_args, self._param_cfg_set, self._context_func, user, selections,
-            runquery=runquery, recurse=recurse, default_traits=self._manifest_cfg.get_default_traits()
-        )
-        output_folder = Path(self._filepath, c.TARGET_FOLDER, c.COMPILE_FOLDER, dataset, test_set)
-        if output_folder.exists():
-            shutil.rmtree(output_folder)
-        output_folder.mkdir(parents=True, exist_ok=True)
-        def write_placeholders() -> None:
-            output_filepath = Path(output_folder, "placeholders.json")
-            with open(output_filepath, 'w') as f:
-                json.dump(dag.placeholders, f, indent=4)
-        def write_model_outputs(model: m.DataModel) -> None:
-            assert isinstance(model, m.QueryModel)
-            subfolder = c.DBVIEWS_FOLDER if model.model_type == m.ModelType.DBVIEW else c.FEDERATES_FOLDER
-            subpath = Path(output_folder, subfolder)
-            subpath.mkdir(parents=True, exist_ok=True)
-            if isinstance(model.compiled_query, mq.SqlModelQuery):
-                output_filepath = Path(subpath, model.name+'.sql')
-                query = model.compiled_query.query
-                with open(output_filepath, 'w') as f:
-                    f.write(query)
-            if runquery and isinstance(model.result, pl.LazyFrame):
-                output_filepath = Path(subpath, model.name+'.csv')
-                model.result.collect().write_csv(output_filepath)
-        write_placeholders()
-        all_model_names = dag.get_all_query_models()
-        coroutines = [asyncio.to_thread(write_model_outputs, dag.models_dict[name]) for name in all_model_names]
-        await u.asyncio_gather(coroutines)
-        if recurse:
-            self._draw_dag(dag, output_folder)
-        if isinstance(dag.target_model, m.QueryModel) and dag.target_model.compiled_query is not None:
-            return dag.target_model.compiled_query.query
     async def compile(
-        self, *, dataset: str | None = None, do_all_datasets: bool = False, selected_model: str | None = None, test_set: str | None = None,
-        do_all_test_sets: bool = False, runquery: bool = False
+        self, *, selected_model: str | None = None, test_set: str | None = None, do_all_test_sets: bool = False,
+        runquery: bool = False, clear: bool = False, buildtime_only: bool = False, runtime_only: bool = False
     ) -> None:
         """
-        Async method to compile the SQL templates into files in the "target/" folder. Same functionality as the "sqrl compile" CLI.
+        Compile models into the "target/compile" folder.
-        Although all arguments are "optional", the "dataset" argument is required if "do_all_datasets" argument is False.
+        Behavior:
+        - Buildtime outputs: target/compile/buildtime/*.sql (for SQL build models) and dag.png
+        - Runtime outputs: target/compile/runtime/[test_set]/dbviews/*.sql, federates/*.sql, dag.png
+          If runquery=True, also write CSVs for runtime models.
+        - Options: clear entire compile folder first; compile only buildtime or only runtime.
         Arguments:
-            dataset: The name of the dataset to compile. Ignored if "do_all_datasets" argument is True, but required (i.e., cannot be None) if "do_all_datasets" is False. Default is None.
-            do_all_datasets: If True, compile all datasets and ignore the "dataset" argument. Default is False.
             selected_model: The name of the model to compile. If specified, the compiled SQL query is also printed in the terminal. If None, all models for the selected dataset are compiled. Default is None.
             test_set: The name of the test set to compile with. If None, the default test set is used (which can vary by dataset). Ignored if `do_all_test_sets` argument is True. Default is None.
             do_all_test_sets: Whether to compile all applicable test sets for the selected dataset(s). If True, the `test_set` argument is ignored. Default is False.
-            runquery**: Whether to run all compiled queries and save each result as a CSV file. If True and `selected_model` is specified, all upstream models of the selected model is compiled as well. Default is False.
+            runquery: Whether to run all compiled queries and save each result as a CSV file. If True and `selected_model` is specified, all upstream models of the selected model is compiled as well. Default is False.
+            clear: Whether to clear the "target/compile/" folder before compiling. Default is False.
+            buildtime_only: Whether to compile only buildtime models. Default is False.
+            runtime_only: Whether to compile only runtime models. Default is False.
         """
-        recurse = True
-        if do_all_datasets:
-            selected_models = [(dataset.name, dataset.model) for dataset in self._manifest_cfg.datasets.values()]
-        else:
-            assert isinstance(dataset, str), "argument 'dataset' must be provided a string value if argument 'do_all_datasets' is False"
-            assert dataset in self._manifest_cfg.datasets, f"dataset '{dataset}' not found in {c.MANIFEST_FILE}"
-            if selected_model is None:
-                selected_model = self._manifest_cfg.datasets[dataset].model
-            else:
-                recurse = False
-            selected_models = [(dataset, selected_model)]
+        border = "=" * 80
+        underlines = "-" * len(border)
+        compile_root = Path(self._project_path, c.TARGET_FOLDER, c.COMPILE_FOLDER)
+        if clear and compile_root.exists():
+            shutil.rmtree(compile_root)
+        models_dict = self._get_models_dict(always_python_df=False)
+        if selected_model is not None:
+            selected_model = u.normalize_name(selected_model)
+            if selected_model not in models_dict:
+                print(f"No such model found: {selected_model}")
+                return
+            if not isinstance(models_dict[selected_model], m.QueryModel):
+                print(f"Model '{selected_model}' is not a query model. Nothing to do.")
+                return
-        coroutines: list[t.Coroutine] = []
-        for dataset, selected_model in selected_models:
-            if do_all_test_sets:
-                for test_set_name in self._manifest_cfg.get_applicable_test_sets(dataset):
-                    coroutine = self._write_dataset_outputs_given_test_set(dataset, selected_model, test_set_name, runquery, recurse)
-                    coroutines.append(coroutine)
+        model_to_compile = None
+        # Buildtime compilation
+        if not runtime_only:
+            print(underlines)
+            print(f"Compiling buildtime models")
+            print(underlines)
+            buildtime_folder = Path(compile_root, c.COMPILE_BUILDTIME_FOLDER)
+            buildtime_folder.mkdir(parents=True, exist_ok=True)
+            def write_buildtime_model(model: m.DataModel, static_models: dict[str, m.StaticModel]) -> None:
+                if not isinstance(model, m.BuildModel):
+                    return
+                model.compile_for_build(self._conn_args, static_models)
+                if isinstance(model.compiled_query, mq.SqlModelQuery):
+                    out_path = Path(buildtime_folder, f"{model.name}.sql")
+                    with open(out_path, 'w') as f:
+                        f.write(model.compiled_query.query)
+                    print(f"Successfully compiled build model: {model.name}")
+                elif isinstance(model.compiled_query, mq.PyModelQuery):
+                    print(f"The build model '{model.name}' is in Python. Compilation for Python is not supported yet.")
+            static_models = self._get_static_models()
+            if selected_model is not None:
+                model_to_compile = models_dict[selected_model]
+                write_buildtime_model(model_to_compile, static_models)
+            else:
+                coros = [asyncio.to_thread(write_buildtime_model, m, static_models) for m in static_models.values()]
+                await u.asyncio_gather(coros)
-            coroutine = self._write_dataset_outputs_given_test_set(dataset, selected_model, test_set, runquery, recurse)
-            coroutines.append(coroutine)
-        queries = await u.asyncio_gather(coroutines)
+            print(underlines)
+            print()
-        print(f"Compiled successfully! See the '{c.TARGET_FOLDER}/' folder for results.")
-        print()
-        if not recurse and len(queries) == 1 and isinstance(queries[0], str):
-            print(queries[0])
+        # Runtime compilation
+        if not buildtime_only:
+            if do_all_test_sets:
+                test_set_names_set = set(self._manifest_cfg.selection_test_sets.keys())
+                test_set_names_set.add(c.DEFAULT_TEST_SET_NAME)
+                test_set_names = list(test_set_names_set)
+            else:
+                test_set_names = [test_set or c.DEFAULT_TEST_SET_NAME]
+            for ts_name in test_set_names:
+                print(underlines)
+                print(f"Compiling runtime models (test set '{ts_name}')")
+                print(underlines)
+                # Build user and selections from test set config if present
+                ts_conf = self._manifest_cfg.selection_test_sets.get(ts_name, self._manifest_cfg.get_default_test_set())
+                # Separate base fields from custom fields
+                access_level = ts_conf.user.access_level
+                custom_fields = self._auth.CustomUserFields(**ts_conf.user.custom_fields)
+                if access_level == "guest":
+                    user = GuestUser(username="", custom_fields=custom_fields)
+                else:
+                    user = RegisteredUser(username="", access_level=access_level, custom_fields=custom_fields)
+                # Generate DAG across all models. When runquery=True, force models to produce Python dataframes so CSVs can be written.
+                dag = await self._get_compiled_dag(
+                    user=user, selections=ts_conf.parameters, configurables=ts_conf.configurables, always_python_df=runquery,
+                )
+                if runquery:
+                    await dag._run_models()
+                # Prepare output folders
+                runtime_folder = Path(compile_root, c.COMPILE_RUNTIME_FOLDER, ts_name)
+                dbviews_folder = Path(runtime_folder, c.DBVIEWS_FOLDER)
+                federates_folder = Path(runtime_folder, c.FEDERATES_FOLDER)
+                dbviews_folder.mkdir(parents=True, exist_ok=True)
+                federates_folder.mkdir(parents=True, exist_ok=True)
+                with open(Path(runtime_folder, "placeholders.json"), "w") as f:
+                    json.dump(dag.placeholders, f)
+                # Function to write runtime models
+                def write_runtime_model(model: m.DataModel) -> None:
+                    if not isinstance(model, m.QueryModel):
+                        return
+                    if model.model_type not in (m.ModelType.DBVIEW, m.ModelType.FEDERATE):
+                        return
+                    subfolder = dbviews_folder if model.model_type == m.ModelType.DBVIEW else federates_folder
+                    model_type = "dbview" if model.model_type == m.ModelType.DBVIEW else "federate"
+                    if isinstance(model.compiled_query, mq.SqlModelQuery):
+                        out_sql = Path(subfolder, f"{model.name}.sql")
+                        with open(out_sql, 'w') as f:
+                            f.write(model.compiled_query.query)
+                        print(f"Successfully compiled {model_type} model: {model.name}")
+                    elif isinstance(model.compiled_query, mq.PyModelQuery):
+                        print(f"The {model_type} model '{model.name}' is in Python. Compilation for Python is not supported yet.")
+                    if runquery and isinstance(model.result, pl.LazyFrame):
+                        out_csv = Path(subfolder, f"{model.name}.csv")
+                        model.result.collect().write_csv(out_csv)
+                        print(f"Successfully created CSV for {model_type} model: {model.name}")
+                # If selected_model is provided for runtime, only emit that model's outputs
+                if selected_model is not None:
+                    model_to_compile = dag.models_dict[selected_model]
+                    write_runtime_model(model_to_compile)
+                else:
+                    coros = [asyncio.to_thread(write_runtime_model, model) for model in dag.models_dict.values()]
+                    await u.asyncio_gather(coros)
+                print(underlines)
+                print()
+        print(f"All compilations complete! See the '{c.TARGET_FOLDER}/{c.COMPILE_FOLDER}/' folder for results.")
+        if model_to_compile and isinstance(model_to_compile, m.QueryModel) and isinstance(model_to_compile.compiled_query, mq.SqlModelQuery):
+            print()
+            print(border)
+            print(f"Compiled SQL query for model '{model_to_compile.name}':")
+            print(underlines)
+            print(model_to_compile.compiled_query.query)
+            print(border)
             print()
-    def _permission_error(self, user: BaseUser | None, data_type: str, data_name: str, scope: str) -> InvalidInputError:
-        username = "" if user is None else f" '{user.username}'"
-        return InvalidInputError(25, f"User{username} does not have permission to access {scope} {data_type}: {data_name}")
+    def _permission_error(self, user: AbstractUser, data_type: str, data_name: str, scope: str) -> InvalidInputError:
+        return InvalidInputError(403, f"unauthorized_access_to_{data_type}", f"User '{user}' does not have permission to access {scope} {data_type}: {data_name}")
     def seed(self, name: str) -> pl.LazyFrame:
         """
@@ -515,37 +616,77 @@ class SquirrelsProject:
             target_model_config=dag.target_model.model_config
         )
-    async def dataset(
-        self, name: str, *, selections: dict[str, t.Any] = {}, user: BaseUser | None = None, require_auth: bool = True
-    ) -> dr.DatasetResult:
+    def _enforce_max_result_rows(self, lazy_df: pl.LazyFrame, error_type: str) -> pl.DataFrame:
         """
-        Async method to retrieve a dataset as a DatasetResult object (with metadata) given parameter selections.
+        Collect at most max_rows + 1 rows from a LazyFrame to detect overflow.
+        Raises InvalidInputError if the result exceeds the maximum allowed rows.
         Arguments:
-            name: The name of the dataset to retrieve.
-            selections: A dictionary of parameter selections to apply to the dataset. Optional, default is empty dictionary.
-            user: The user to use for authentication. If None, no user is used. Optional, default is None.
+            lazy_df: The LazyFrame to collect and check
+            error_type: Either "dataset" or "query" to customize the error message
         Returns:
-            A DatasetResult object containing the dataset result (as a polars DataFrame), its description, and the column details.
+            A DataFrame with at most max_rows rows (or raises if exceeded)
         """
+        max_rows = self._env_vars.datasets_max_rows_output
+        # Collect max_rows + 1 to detect overflow without loading unbounded results
+        collected = lazy_df.limit(max_rows + 1).collect()
+        row_count = collected.select(pl.len()).item()
+        if row_count > max_rows:
+            raise InvalidInputError(
+                413, f"{error_type}_result_too_large",
+                f"The {error_type} result contains {row_count} rows, which exceeds the maximum allowed of {max_rows} rows."
+            )
+        return collected
+    async def _dataset_result(
+        self, name: str, *, selections: dict[str, t.Any] = {}, user: AbstractUser | None = None,
+        configurables: dict[str, str] = {}, check_user_access: bool = True
+    ) -> dr.DatasetResult:
+        if user is None:
+            user = self._guest_user
         scope = self._manifest_cfg.datasets[name].scope
-        if require_auth and not self._auth.can_user_access_scope(user, scope):
+        if check_user_access and not self._auth.can_user_access_scope(user, scope):
             raise self._permission_error(user, "dataset", name, scope.name)
+        dataset_config = self._manifest_cfg.datasets[name]
+        configurables = {**self._manifest_cfg.get_default_configurables(overrides=dataset_config.configurables), **configurables}
         dag = self._generate_dag(name)
         await dag.execute(
-            self._param_args, self._param_cfg_set, self._context_func, user, dict(selections),
-            default_traits=self._manifest_cfg.get_default_traits()
+            self._param_args, self._param_cfg_set, self._context_func, user, dict(selections), configurables=configurables
         )
         assert isinstance(dag.target_model.result, pl.LazyFrame)
+        df = self._enforce_max_result_rows(dag.target_model.result, "dataset")
         return dr.DatasetResult(
             target_model_config=dag.target_model.model_config,
-            df=dag.target_model.result.collect().with_row_index("_row_num", offset=1)
+            df=df.with_row_index("_row_num", offset=1)
         )
+    async def dataset_result(
+        self, name: str, *, selections: dict[str, t.Any] = {}, user: AbstractUser | None = None, configurables: dict[str, str] = {}
+    ) -> dr.DatasetResult:
+        """
+        Async method to retrieve a dataset as a DatasetResult object (with metadata) given parameter selections.
+        Arguments:
+            name: The name of the dataset to retrieve.
+            selections: A dictionary of parameter selections to apply to the dataset. Optional, default is empty dictionary.
+            user: The user to use for authentication. If None, no user is used. Optional, default is None.
+            configurables: A dictionary of configurables to apply to the dataset. Optional, default is empty dictionary.
+        Returns:
+            A DatasetResult object containing the dataset result (as a polars DataFrame), its description, and the column details.
+        """
+        result = await self._dataset_result(name, selections=selections, user=user, configurables=configurables, check_user_access=False)
+        return result
     async def dashboard(
-        self, name: str, *, selections: dict[str, t.Any] = {}, user: BaseUser | None = None, dashboard_type: t.Type[T] = dash.PngDashboard
+        self, name: str, *, selections: dict[str, t.Any] = {}, user: AbstractUser | None = None, dashboard_type: t.Type[T] = d.PngDashboard,
+        configurables: dict[str, str] = {}
     ) -> T:
         """
         Async method to retrieve a dashboard given parameter selections.
@@ -559,28 +700,97 @@ class SquirrelsProject:
         Returns:
             The dashboard type specified by the "dashboard_type" argument.
         """
+        if user is None:
+            user = self._guest_user
         scope = self._dashboards[name].config.scope
         if not self._auth.can_user_access_scope(user, scope):
             raise self._permission_error(user, "dashboard", name, scope.name)
         async def get_dataset_df(dataset_name: str, fixed_params: dict[str, t.Any]) -> pl.DataFrame:
             final_selections = {**selections, **fixed_params}
-            result = await self.dataset(dataset_name, selections=final_selections, user=user, require_auth=False)
+            result = await self.dataset_result(
+                dataset_name, selections=final_selections, user=user, configurables=configurables
+            )
             return result.df
-        args = d.DashboardArgs(self._param_args, get_dataset_df)
+        dashboard_config = self._dashboards[name].config
+        parameter_set = self._param_cfg_set.apply_selections(dashboard_config.parameters, selections, user)
+        prms = parameter_set.get_parameters_as_dict()
+        configurables = {**self._manifest_cfg.get_default_configurables(overrides=dashboard_config.configurables), **configurables}
+        context = {}
+        ctx_args = m.ContextArgs(
+            **self._param_args.__dict__, user=user, prms=prms, configurables=configurables, _conn_args=self._conn_args
+        )
+        self._context_func(context, ctx_args)
+        args = d.DashboardArgs(
+            **ctx_args.__dict__, ctx=context, _get_dataset=get_dataset_df
+        )
         try:
             return await self._dashboards[name].get_dashboard(args, dashboard_type=dashboard_type)
         except KeyError:
             raise KeyError(f"No dashboard file found for: {name}")
     async def query_models(
-        self, sql_query: str, *, selections: dict[str, t.Any] = {}, user: BaseUser | None = None
+        self, sql_query: str, *, user: AbstractUser | None = None, selections: dict[str, t.Any] = {}, configurables: dict[str, str] = {}
     ) -> dr.DatasetResult:
-        dag = await self._get_compiled_dag(sql_query=sql_query, selections=selections, user=user)
+        if user is None:
+            user = self._guest_user
+        dag = await self._get_compiled_dag(user=user, sql_query=sql_query, selections=selections, configurables=configurables)
         await dag._run_models()
         assert isinstance(dag.target_model.result, pl.LazyFrame)
+        df = self._enforce_max_result_rows(dag.target_model.result, "query")
         return dr.DatasetResult(
             target_model_config=dag.target_model.model_config,
-            df=dag.target_model.result.collect().with_row_index("_row_num", offset=1)
+            df=df.with_row_index("_row_num", offset=1)
         )
+    async def get_compiled_model_query(
+        self, model_name: str, *, user: AbstractUser | None = None, selections: dict[str, t.Any] = {}, configurables: dict[str, str] = {}
+    ) -> rm.CompiledQueryModel:
+        """
+        Compile the specified data model and return its language and compiled definition.
+        """
+        if user is None:
+            user = self._guest_user
+        name = u.normalize_name(model_name)
+        models_dict = self._get_models_dict(always_python_df=False)
+        if name not in models_dict:
+            raise InvalidInputError(404, "model_not_found", f"No data model found with name: {model_name}")
+        model = models_dict[name]
+        # Only build, dbview, and federate models support runtime compiled definition in this context
+        if not isinstance(model, (m.BuildModel, m.DbviewModel, m.FederateModel)):
+            raise InvalidInputError(400, "unsupported_model_type", "Only build, dbview, and federate models currently support compiled definition via this endpoint")
+        # Build a DAG with this model as the target, without a dataset context
+        model.is_target = True
+        dag = m.DAG(None, model, models_dict, self._vdl_catalog_db_path, self._logger)
+        cfg = {**self._manifest_cfg.get_default_configurables(), **configurables}
+        await dag.execute(
+            self._param_args, self._param_cfg_set, self._context_func, user, selections, runquery=False, configurables=cfg
+        )
+        language = "sql" if isinstance(model.query_file, mq.SqlQueryFile) else "python"
+        if isinstance(model, m.BuildModel):
+            # Compile SQL build models; Python build models not yet supported
+            if isinstance(model.query_file, mq.SqlQueryFile):
+                static_models = self._get_static_models()
+                compiled = model._compile_sql_model(model.query_file, self._conn_args, static_models)
+                definition = compiled.query
+            else:
+                definition = "# Compiling Python build models is currently not supported. This will be available in a future version of Squirrels..."
+        elif isinstance(model.compiled_query, mq.SqlModelQuery):
+            definition = model.compiled_query.query
+        elif isinstance(model.compiled_query, mq.PyModelQuery):
+            definition = "# Compiling Python data models is currently not supported. This will be available in a future version of Squirrels..."
+        else:
+            raise NotImplementedError(f"Query type not supported: {model.compiled_query.__class__.__name__}")
+        return rm.CompiledQueryModel(language=language, definition=definition, placeholders=dag.placeholders)

squirrels 0.5.0b3__py3-none-any.whl → 0.6.0.post0__py3-none-any.whl

squirrels 0.5.0b3py3-none-any.whl → 0.6.0.post0py3-none-any.whl