PyPI - squirrels - Versions diffs - 0.5.0b3__py3-none-any.whl → 0.6.0.post0__py3-none-any.whl - Mend

squirrels 0.5.0b3py3-none-any.whl → 0.6.0.post0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

squirrels/__init__.py +4 -0
squirrels/_api_routes/__init__.py +5 -0
squirrels/_api_routes/auth.py +337 -0
squirrels/_api_routes/base.py +196 -0
squirrels/_api_routes/dashboards.py +156 -0
squirrels/_api_routes/data_management.py +148 -0
squirrels/_api_routes/datasets.py +220 -0
squirrels/_api_routes/project.py +289 -0
squirrels/_api_server.py +440 -792
squirrels/_arguments/__init__.py +0 -0
squirrels/_arguments/{_init_time_args.py → init_time_args.py} +23 -43
squirrels/_arguments/{_run_time_args.py → run_time_args.py} +32 -68
squirrels/_auth.py +590 -264
squirrels/_command_line.py +130 -58
squirrels/_compile_prompts.py +147 -0
squirrels/_connection_set.py +16 -15
squirrels/_constants.py +36 -11
squirrels/_dashboards.py +179 -0
squirrels/_data_sources.py +40 -34
squirrels/_dataset_types.py +16 -11
squirrels/_env_vars.py +209 -0
squirrels/_exceptions.py +9 -37
squirrels/_http_error_responses.py +52 -0
squirrels/_initializer.py +7 -6
squirrels/_logging.py +121 -0
squirrels/_manifest.py +155 -77
squirrels/_mcp_server.py +578 -0
squirrels/_model_builder.py +11 -55
squirrels/_model_configs.py +5 -5
squirrels/_model_queries.py +1 -1
squirrels/_models.py +276 -143
squirrels/_package_data/base_project/.env +1 -24
squirrels/_package_data/base_project/.env.example +31 -17
squirrels/_package_data/base_project/connections.yml +4 -3
squirrels/_package_data/base_project/dashboards/dashboard_example.py +13 -7
squirrels/_package_data/base_project/dashboards/dashboard_example.yml +6 -6
squirrels/_package_data/base_project/docker/Dockerfile +2 -2
squirrels/_package_data/base_project/docker/compose.yml +1 -1
squirrels/_package_data/base_project/duckdb_init.sql +1 -0
squirrels/_package_data/base_project/models/builds/build_example.py +2 -2
squirrels/_package_data/base_project/models/dbviews/dbview_example.sql +7 -2
squirrels/_package_data/base_project/models/dbviews/dbview_example.yml +16 -10
squirrels/_package_data/base_project/models/federates/federate_example.py +27 -17
squirrels/_package_data/base_project/models/federates/federate_example.sql +3 -7
squirrels/_package_data/base_project/models/federates/federate_example.yml +7 -7
squirrels/_package_data/base_project/models/sources.yml +5 -6
squirrels/_package_data/base_project/parameters.yml +24 -38
squirrels/_package_data/base_project/pyconfigs/connections.py +8 -3
squirrels/_package_data/base_project/pyconfigs/context.py +26 -14
squirrels/_package_data/base_project/pyconfigs/parameters.py +124 -81
squirrels/_package_data/base_project/pyconfigs/user.py +48 -15
squirrels/_package_data/base_project/resources/public/.gitkeep +0 -0
squirrels/_package_data/base_project/seeds/seed_categories.yml +1 -1
squirrels/_package_data/base_project/seeds/seed_subcategories.yml +1 -1
squirrels/_package_data/base_project/squirrels.yml.j2 +21 -31
squirrels/_package_data/templates/login_successful.html +53 -0
squirrels/_package_data/templates/squirrels_studio.html +22 -0
squirrels/_parameter_configs.py +43 -22
squirrels/_parameter_options.py +1 -1
squirrels/_parameter_sets.py +41 -30
squirrels/_parameters.py +560 -123
squirrels/_project.py +487 -277
squirrels/_py_module.py +71 -10
squirrels/_request_context.py +33 -0
squirrels/_schemas/__init__.py +0 -0
squirrels/_schemas/auth_models.py +83 -0
squirrels/_schemas/query_param_models.py +70 -0
squirrels/_schemas/request_models.py +26 -0
squirrels/_schemas/response_models.py +286 -0
squirrels/_seeds.py +52 -13
squirrels/_sources.py +29 -23
squirrels/_utils.py +221 -42
squirrels/_version.py +1 -3
squirrels/arguments.py +7 -2
squirrels/auth.py +4 -0
squirrels/connections.py +2 -0
squirrels/dashboards.py +3 -1
squirrels/data_sources.py +6 -0
squirrels/parameter_options.py +5 -0
squirrels/parameters.py +5 -0
squirrels/types.py +10 -3
squirrels-0.6.0.post0.dist-info/METADATA +148 -0
squirrels-0.6.0.post0.dist-info/RECORD +101 -0
{squirrels-0.5.0b3.dist-info → squirrels-0.6.0.post0.dist-info}/WHEEL +1 -1
squirrels/_api_response_models.py +0 -190
squirrels/_dashboard_types.py +0 -82
squirrels/_dashboards_io.py +0 -79
squirrels-0.5.0b3.dist-info/METADATA +0 -110
squirrels-0.5.0b3.dist-info/RECORD +0 -80
/squirrels/_package_data/base_project/{assets → resources}/expenses.db +0 -0
/squirrels/_package_data/base_project/{assets → resources}/weather.db +0 -0
{squirrels-0.5.0b3.dist-info → squirrels-0.6.0.post0.dist-info}/entry_points.txt +0 -0
{squirrels-0.5.0b3.dist-info → squirrels-0.6.0.post0.dist-info}/licenses/LICENSE +0 -0

squirrels/_seeds.py CHANGED Viewed

@@ -1,7 +1,15 @@
 from dataclasses import dataclass
-import os, time, glob, polars as pl, json
+import os
+import re
+import time
+import glob
+import json
+import polars as pl
+from ._exceptions import ConfigurationError
 from . import _utils as u, _constants as c, _model_configs as mc
+from ._env_vars import SquirrelsEnvVars
 @dataclass
@@ -13,21 +21,47 @@ class Seed:
         if self.config.cast_column_types:
             exprs = []
             for col_config in self.config.columns:
-                sqrl_dtype = "double" if col_config.type.lower().startswith("decimal") else col_config.type
-                polars_dtype = u.sqrl_dtypes_to_polars_dtypes.get(sqrl_dtype, pl.String)
+                col_type = col_config.type.lower()
+                if col_type.startswith("decimal"):
+                    polars_dtype = self._parse_decimal_type(col_type)
+                else:
+                    try:
+                        polars_dtype = u.sqrl_dtypes_to_polars_dtypes[col_type]
+                    except KeyError as e:
+                        raise ConfigurationError(f"Unknown column type: '{col_type}'") from e
                 exprs.append(pl.col(col_config.name).cast(polars_dtype))
             self.df = self.df.with_columns(*exprs)
+    @staticmethod
+    def _parse_decimal_type(col_type: str) -> pl.Decimal:
+        """Parse a decimal type string and return the appropriate polars Decimal type.
+        Supports formats: "decimal" or "decimal(precision, scale)"
+        """
+        # Match decimal(precision, scale) pattern
+        match = re.match(r"decimal\s*\(\s*(\d+)\s*,\s*(\d+)\s*\)", col_type)
+        if match:
+            precision = int(match.group(1))
+            scale = int(match.group(2))
+            return pl.Decimal(precision=precision, scale=scale)
+        if col_type == "decimal":
+            return pl.Decimal(precision=18, scale=2)
+        raise ConfigurationError(f"Unknown column type: '{col_type}'")
 @dataclass
 class Seeds:
     _data: dict[str, Seed]
     def run_query(self, sql_query: str) -> pl.DataFrame:
         dataframes = {key: seed.df for key, seed in self._data.items()}
         return u.run_sql_on_dataframes(sql_query, dataframes)
     def get_dataframes(self) -> dict[str, Seed]:
         return self._data.copy()
@@ -35,13 +69,14 @@ class Seeds:
 class SeedsIO:
     @classmethod
-    def load_files(cls, logger: u.Logger, base_path: str, env_vars: dict[str, str]) -> Seeds:
+    def load_files(cls, logger: u.Logger, env_vars: SquirrelsEnvVars) -> Seeds:
         start = time.time()
-        infer_schema_setting: bool = (env_vars.get(c.SQRL_SEEDS_INFER_SCHEMA, "true").lower() == "true")
-        na_values_setting: list[str] = json.loads(env_vars.get(c.SQRL_SEEDS_NA_VALUES, "[]"))
+        project_path = env_vars.project_path
+        infer_schema_setting: bool = env_vars.seeds_infer_schema
+        na_values_setting: list[str] = env_vars.seeds_na_values
         seeds_dict = {}
-        csv_files = glob.glob(os.path.join(base_path, c.SEEDS_FOLDER, '**/*.csv'), recursive=True)
+        csv_files = glob.glob(os.path.join(project_path, c.SEEDS_FOLDER, '**/*.csv'), recursive=True)
         for csv_file in csv_files:
             config_file = os.path.splitext(csv_file)[0] + '.yml'
             config_dict = u.load_yaml_config(config_file) if os.path.exists(config_file) else {}
@@ -49,10 +84,14 @@ class SeedsIO:
             file_stem = os.path.splitext(os.path.basename(csv_file))[0]
             infer_schema = not config.cast_column_types and infer_schema_setting
-            df = pl.read_csv(csv_file, try_parse_dates=True, infer_schema=infer_schema, null_values=na_values_setting).lazy()
+            df = pl.read_csv(
+                csv_file, try_parse_dates=True,
+                infer_schema=infer_schema,
+                null_values=na_values_setting
+            ).lazy()
             seeds_dict[file_stem] = Seed(config, df)
         seeds = Seeds(seeds_dict)
         logger.log_activity_time("loading seed files", start)
         return seeds

squirrels/_sources.py CHANGED Viewed

@@ -1,19 +1,20 @@
 from typing import Any
 from pydantic import BaseModel, Field, model_validator
-import time, sqlglot
+import time, yaml
 from . import _utils as u, _constants as c, _model_configs as mc
+from ._env_vars import SquirrelsEnvVars
 class UpdateHints(BaseModel):
     increasing_column: str | None = Field(default=None)
-    strictly_increasing: bool = Field(default=True, description="Delete the max value of the increasing column, ignored if value is set")
-    selective_overwrite_value: Any = Field(default=None)
+    strictly_increasing: bool = Field(default=True, description="Delete the max value of the increasing column, ignored if selective_overwrite_value is set")
+    selective_overwrite_value: Any = Field(default=None, description="Delete all values of the increasing column greater than or equal to this value")
 class Source(mc.ConnectionInterface, mc.ModelConfig):
     table: str | None = Field(default=None)
-    load_to_duckdb: bool = Field(default=False, description="Whether to load the data to DuckDB")
+    load_to_vdl: bool = Field(default=False, description="Whether to load the data to the 'virtual data lake' (VDL)")
     primary_key: list[str] = Field(default_factory=list)
     update_hints: UpdateHints = Field(default_factory=UpdateHints)
@@ -28,34 +29,28 @@ class Source(mc.ConnectionInterface, mc.ModelConfig):
     def get_cols_for_create_table_stmt(self) -> str:
         cols_clause = ", ".join([f"{col.name} {col.type}" for col in self.columns])
-        primary_key_clause = f", PRIMARY KEY ({', '.join(self.primary_key)})" if self.primary_key else ""
-        return f"{cols_clause}{primary_key_clause}"
-    def get_cols_for_insert_stmt(self) -> str:
-        return ", ".join([col.name for col in self.columns])
+        return cols_clause
     def get_max_incr_col_query(self, source_name: str) -> str:
         return f"SELECT max({self.update_hints.increasing_column}) FROM {source_name}"
-    def get_query_for_insert(self, dialect: str, conn_name: str, table_name: str, max_value_of_increasing_col: Any | None, *, full_refresh: bool = True) -> str:
-        select_cols = self.get_cols_for_insert_stmt()
+    def get_query_for_upsert(self, dialect: str, conn_name: str, table_name: str, max_value_of_increasing_col: Any | None, *, full_refresh: bool = True) -> str:
+        select_cols = ", ".join([col.name for col in self.columns])
         if full_refresh or max_value_of_increasing_col is None:
             return f"SELECT {select_cols} FROM db_{conn_name}.{table_name}"
         increasing_col = self.update_hints.increasing_column
         increasing_col_type = next(col.type for col in self.columns if col.name == increasing_col)
         where_cond = f"{increasing_col}::{increasing_col_type} > '{max_value_of_increasing_col}'::{increasing_col_type}"
-        pushdown_query = f"SELECT {select_cols} FROM {table_name} WHERE {where_cond}"
-        if dialect in ['postgres', 'mysql']:
-            transpiled_query = sqlglot.transpile(pushdown_query, read='duckdb', write=dialect)[0].replace("'", "''")
-            return f"FROM {dialect}_query('db_{conn_name}', '{transpiled_query}')"
+        # TODO: figure out if using pushdown query is worth it
+        # if dialect in ['postgres', 'mysql']:
+        #     pushdown_query = f"SELECT {select_cols} FROM {table_name} WHERE {where_cond}"
+        #     transpiled_query = sqlglot.transpile(pushdown_query, read='duckdb', write=dialect)[0].replace("'", "''")
+        #     return f"FROM {dialect}_query('db_{conn_name}', '{transpiled_query}')"
         return f"SELECT {select_cols} FROM db_{conn_name}.{table_name} WHERE {where_cond}"
-    def get_insert_replace_clause(self) -> str:
-        return "" if len(self.primary_key) == 0 else "OR REPLACE"
 class Sources(BaseModel):
     sources: dict[str, Source] = Field(default_factory=dict)
@@ -85,20 +80,31 @@ class Sources(BaseModel):
                     raise u.ConfigurationError(f"Column '{col.name}' in source '{source_name}' must have a type specified")
         return self
-    def finalize_null_fields(self, env_vars: dict[str, str]):
+    def finalize_null_fields(self, env_vars: SquirrelsEnvVars):
+        default_conn_name = env_vars.connections_default_name_used
         for source_name, source in self.sources.items():
-            source.finalize_connection(env_vars)
+            source.finalize_connection(default_conn_name=default_conn_name)
             source.finalize_table(source_name)
         return self
 class SourcesIO:
     @classmethod
-    def load_file(cls, logger: u.Logger, base_path: str, env_vars: dict[str, str]) -> Sources:
+    def load_file(cls, logger: u.Logger, env_vars: SquirrelsEnvVars, env_vars_unformatted: dict[str, str]) -> Sources:
         start = time.time()
-        sources_path = u.Path(base_path, c.MODELS_FOLDER, c.SOURCES_FILE)
-        sources_data = u.load_yaml_config(sources_path) if sources_path.exists() else {}
+        sources_path = u.Path(env_vars.project_path, c.MODELS_FOLDER, c.SOURCES_FILE)
+        if sources_path.exists():
+            raw_content = u.read_file(sources_path)
+            rendered = u.render_string(raw_content, project_path=env_vars.project_path, env_vars=env_vars_unformatted)
+            sources_data = yaml.safe_load(rendered) or {}
+        else:
+            sources_data = {}
+        if not isinstance(sources_data, dict):
+            raise u.ConfigurationError(
+                f"Parsed content from YAML file must be a dictionary. Got: {sources_data}"
+            )
         sources = Sources(**sources_data).finalize_null_fields(env_vars)

squirrels/_utils.py CHANGED Viewed

@@ -1,18 +1,16 @@
-from typing import Sequence, Optional, Union, TypeVar, Callable, Any, Iterable
+from typing import Sequence, Optional, Union, TypeVar, Callable, Iterable, Literal, Any
 from datetime import datetime
 from pathlib import Path
-from functools import lru_cache
-from pydantic import BaseModel
 import os, time, logging, json, duckdb, polars as pl, yaml
 import jinja2 as j2, jinja2.nodes as j2_nodes
-import sqlglot, sqlglot.expressions, asyncio
+import sqlglot, sqlglot.expressions, asyncio, hashlib, inspect, base64
 from . import _constants as c
 from ._exceptions import ConfigurationError
 FilePath = Union[str, Path]
-# Polars
+# Polars <-> Squirrels dtypes mappings (except Decimal)
 polars_dtypes_to_sqrl_dtypes: dict[type[pl.DataType], list[str]] = {
     pl.String: ["string", "varchar", "char", "text"],
     pl.Int8: ["tinyint", "int1"],
@@ -20,7 +18,7 @@ polars_dtypes_to_sqrl_dtypes: dict[type[pl.DataType], list[str]] = {
     pl.Int32: ["integer", "int", "int4"],
     pl.Int64: ["bigint", "long", "int8"],
     pl.Float32: ["float", "float4", "real"],
-    pl.Float64: ["double", "float8", "decimal"], # Note: Polars Decimal type is considered unstable, so we use Float64 for "decimal"
+    pl.Float64: ["double", "float8"],
     pl.Boolean: ["boolean", "bool", "logical"],
     pl.Date: ["date"],
     pl.Time: ["time"],
@@ -29,18 +27,28 @@ polars_dtypes_to_sqrl_dtypes: dict[type[pl.DataType], list[str]] = {
     pl.Binary: ["blob", "binary", "varbinary"]
 }
-sqrl_dtypes_to_polars_dtypes: dict[str, type[pl.DataType]] = {sqrl_type: k for k, v in polars_dtypes_to_sqrl_dtypes.items() for sqrl_type in v}
+sqrl_dtypes_to_polars_dtypes: dict[str, type[pl.DataType]] = {
+    sqrl_type: k for k, v in polars_dtypes_to_sqrl_dtypes.items() for sqrl_type in v
+}
 ## Other utility classes
 class Logger(logging.Logger):
-    def log_activity_time(self, activity: str, start_timestamp: float, *, request_id: str | None = None) -> None:
+    def info(self, msg: str, *, data: dict[str, Any] = {}, **kwargs) -> None:
+        super().info(msg, extra={"data": data}, **kwargs)
+    def log_activity_time(self, activity: str, start_timestamp: float, *, additional_data: dict[str, Any] = {}) -> None:
         end_timestamp = time.time()
         time_taken = round((end_timestamp-start_timestamp) * 10**3, 3)
-        data = { "activity": activity, "start_timestamp": start_timestamp, "end_timestamp": end_timestamp, "time_taken_ms": time_taken }
-        info = { "request_id": request_id } if request_id else {}
-        self.info(f'Time taken for "{activity}": {time_taken}ms', extra={"data": data, "info": info})
+        data = {
+            "activity": activity,
+            "start_timestamp": start_timestamp,
+            "end_timestamp": end_timestamp,
+            "time_taken_ms": time_taken,
+            **additional_data
+        }
+        self.info(f'Time taken for "{activity}": {time_taken}ms', data=data)
 class EnvironmentWithMacros(j2.Environment):
@@ -85,15 +93,7 @@ class EnvironmentWithMacros(j2.Environment):
 ## Utility functions/variables
-def log_activity_time(logger: logging.Logger, activity: str, start_timestamp: float, *, request_id: str | None = None) -> None:
-    end_timestamp = time.time()
-    time_taken = round((end_timestamp-start_timestamp) * 10**3, 3)
-    data = { "activity": activity, "start_timestamp": start_timestamp, "end_timestamp": end_timestamp, "time_taken_ms": time_taken }
-    info = { "request_id": request_id } if request_id else {}
-    logger.debug(f'Time taken for "{activity}": {time_taken}ms', extra={"data": data, "info": info})
-def render_string(raw_str: str, *, base_path: str = ".", **kwargs) -> str:
+def render_string(raw_str: str, *, project_path: str = ".", **kwargs) -> str:
     """
     Given a template string, render it with the given keyword arguments
@@ -104,7 +104,7 @@ def render_string(raw_str: str, *, base_path: str = ".", **kwargs) -> str:
     Returns:
         The rendered string
     """
-    j2_env = j2.Environment(loader=j2.FileSystemLoader(base_path))
+    j2_env = j2.Environment(loader=j2.FileSystemLoader(project_path))
     template = j2_env.from_string(raw_str)
     return template.render(kwargs)
@@ -128,7 +128,7 @@ def read_file(filepath: FilePath) -> str:
 def normalize_name(name: str) -> str:
     """
-    Normalizes names to the convention of the squirrels manifest file.
+    Normalizes names to the convention of the squirrels manifest file (with underscores instead of dashes).
     Arguments:
         name: The name to normalize.
@@ -141,7 +141,7 @@ def normalize_name(name: str) -> str:
 def normalize_name_for_api(name: str) -> str:
     """
-    Normalizes names to the REST API convention.
+    Normalizes names to the REST API convention (with dashes instead of underscores).
     Arguments:
         name: The name to normalize.
@@ -196,8 +196,10 @@ def process_if_not_none(input_val: Optional[X], processor: Callable[[X], Y]) ->
     return processor(input_val)
-@lru_cache(maxsize=1)
-def _read_duckdb_init_sql() -> tuple[str, Path | None]:
+def _read_duckdb_init_sql(
+    *,
+    datalake_db_path: str | None = None,
+) -> str:
     """
     Reads and caches the duckdb init file content.
     Returns None if file doesn't exist or is empty.
@@ -212,35 +214,38 @@ def _read_duckdb_init_sql() -> tuple[str, Path | None]:
         if Path(c.DUCKDB_INIT_FILE).exists():
             with open(c.DUCKDB_INIT_FILE, 'r') as f:
                 init_contents.append(f.read())
-        init_sql = "\n".join(init_contents).strip()
-        target_init_path = None
-        if init_sql:
-            target_init_path = Path(c.TARGET_FOLDER, c.DUCKDB_INIT_FILE)
-            target_init_path.parent.mkdir(parents=True, exist_ok=True)
-            target_init_path.write_text(init_sql)
-        return init_sql, target_init_path
+        if datalake_db_path:
+            attach_stmt = f"ATTACH '{datalake_db_path}' AS vdl (READ_ONLY);"
+            init_contents.append(attach_stmt)
+            use_stmt = f"USE vdl;"
+            init_contents.append(use_stmt)
+        init_sql = "\n\n".join(init_contents).strip()
+        return init_sql
     except Exception as e:
         raise ConfigurationError(f"Failed to read {c.DUCKDB_INIT_FILE}: {str(e)}") from e
-def create_duckdb_connection(filepath: str | Path = ":memory:", *, read_only: bool = False) -> duckdb.DuckDBPyConnection:
+def create_duckdb_connection(
+    db_path: str | Path = ":memory:",
+    *,
+    datalake_db_path: str | None = None
+) -> duckdb.DuckDBPyConnection:
     """
     Creates a DuckDB connection and initializes it with statements from duckdb init file
     Arguments:
         filepath: Path to the DuckDB database file. Defaults to in-memory database.
-        read_only: Whether to open the database in read-only mode. Defaults to False.
+        datalake_db_path: The path to the VDL catalog database if applicable. If exists, this is attached as 'vdl' (READ_ONLY). Default is None.
     Returns:
         A DuckDB connection (which must be closed after use)
     """
-    conn = duckdb.connect(filepath, read_only=read_only)
+    conn = duckdb.connect(db_path)
     try:
-        init_sql, _ = _read_duckdb_init_sql()
-        if init_sql:
-            conn.execute(init_sql)
+        init_sql = _read_duckdb_init_sql(datalake_db_path=datalake_db_path)
+        conn.execute(init_sql)
     except Exception as e:
         conn.close()
         raise ConfigurationError(f"Failed to execute {c.DUCKDB_INIT_FILE}: {str(e)}") from e
@@ -272,6 +277,114 @@ def run_sql_on_dataframes(sql_query: str, dataframes: dict[str, pl.LazyFrame]) -
     return result_df
+async def run_polars_sql_on_dataframes(
+    sql_query: str, dataframes: dict[str, pl.LazyFrame], *, timeout_seconds: float = 2.0, max_rows: int | None = None
+) -> pl.DataFrame:
+    """
+    Runs a SQL query against a collection of dataframes using Polars SQL (more secure than DuckDB for user input).
+    Arguments:
+        sql_query: The SQL query to run (Polars SQL dialect)
+        dataframes: A dictionary of table names to their polars LazyFrame
+        timeout_seconds: Maximum execution time in seconds (default 2.0)
+        max_rows: Maximum number of rows to collect. Collects at most max_rows + 1 rows
+                  to allow overflow detection without loading unbounded results into memory.
+    Returns:
+        The result as a polars DataFrame from running the query (limited to max_rows + 1)
+    Raises:
+        ConfigurationError: If the query is invalid or insecure
+    """
+    # Validate the SQL query
+    _validate_sql_query_security(sql_query, dataframes)
+    # Execute with timeout
+    try:
+        loop = asyncio.get_event_loop()
+        result = await asyncio.wait_for(
+            loop.run_in_executor(None, _run_polars_sql_sync, sql_query, dataframes, max_rows),
+            timeout=timeout_seconds
+        )
+        return result
+    except asyncio.TimeoutError as e:
+        raise ConfigurationError(f"SQL query execution exceeded timeout of {timeout_seconds} seconds") from e
+def _run_polars_sql_sync(sql_query: str, dataframes: dict[str, pl.LazyFrame], max_rows: int | None) -> pl.DataFrame:
+    """
+    Synchronous execution of Polars SQL.
+    Arguments:
+        sql_query: The SQL query to run
+        dataframes: A dictionary of table names to their polars LazyFrame
+        max_rows: Maximum number of rows to collect.
+    """
+    ctx = pl.SQLContext(**dataframes)
+    result = ctx.execute(sql_query, eager=False)
+    if max_rows is not None:
+        result = result.limit(max_rows)
+    return result.collect()
+def _validate_sql_query_security(sql_query: str, dataframes: dict[str, pl.LazyFrame]) -> None:
+    """
+    Validates that a SQL query is safe to execute.
+    Enforces:
+    - Single statement only
+    - Read-only operations (SELECT/WITH/UNION)
+    - Table references limited to registered frames (excluding CTE names)
+    Arguments:
+        sql_query: The SQL query to validate
+        dataframes: Dictionary of allowed table names
+    Raises:
+        ConfigurationError: If validation fails
+    """
+    try:
+        parsed = sqlglot.parse(sql_query)
+    except Exception as e:
+        raise ConfigurationError(f"Failed to parse SQL query: {str(e)}") from e
+    # Enforce single statement
+    if len(parsed) != 1:
+        raise ConfigurationError(f"Only single SQL statements are allowed. Found {len(parsed)} statements.")
+    statement = parsed[0]
+    # Enforce read-only: allow SELECT, WITH (CTE), UNION, INTERSECT, EXCEPT
+    allowed_types = (
+        sqlglot.expressions.Select,
+        sqlglot.expressions.Union,
+        sqlglot.expressions.Intersect,
+        sqlglot.expressions.Except,
+    )
+    if not isinstance(statement, allowed_types):
+        raise ConfigurationError(
+            f"Only read-only SQL statements (SELECT, WITH, UNION, INTERSECT, EXCEPT) are allowed. "
+            f"Found: {type(statement).__name__}"
+        )
+    # Collect CTE names (these are temporary tables created by WITH clauses)
+    cte_names: set[str] = set()
+    for cte in statement.find_all(sqlglot.expressions.CTE):
+        if cte.alias:
+            cte_names.add(cte.alias)
+    # Validate table references (excluding CTE names)
+    allowed_tables = set(dataframes.keys()) | cte_names
+    for table in statement.find_all(sqlglot.expressions.Table):
+        table_name = table.name
+        if table_name not in allowed_tables:
+            raise ConfigurationError(
+                f"Table reference '{table_name}' is not allowed. "
+                f"Only the following tables are available: {sorted(dataframes.keys())}"
+            )
 def load_yaml_config(filepath: FilePath) -> dict:
     """
     Loads a YAML config file
@@ -284,7 +397,13 @@ def load_yaml_config(filepath: FilePath) -> dict:
     """
     try:
         with open(filepath, 'r') as f:
-            return yaml.safe_load(f)
+            content = yaml.safe_load(f)
+            content = content if content else {}
+        if not isinstance(content, dict):
+            raise yaml.YAMLError(f"Parsed content from YAML file must be a dictionary. Got: {content}")
+        return content
     except yaml.YAMLError as e:
         raise ConfigurationError(f"Failed to parse yaml file: {filepath}") from e
@@ -308,7 +427,7 @@ def run_duckdb_stmt(
         redacted_stmt = redacted_stmt.replace(value, "[REDACTED]")
     for_model_name = f" for model '{model_name}'" if model_name is not None else ""
-    logger.info(f"Running SQL statement{for_model_name}:\n{redacted_stmt}", extra={"data": {"params": params}})
+    logger.debug(f"Running SQL statement{for_model_name}:\n{redacted_stmt}")
     try:
         return duckdb_conn.execute(stmt, params)
     except duckdb.ParserException as e:
@@ -359,3 +478,63 @@ async def asyncio_gather(coroutines: list):
         # Wait for tasks to be cancelled
         await asyncio.gather(*tasks, return_exceptions=True)
         raise
+def hash_string(input_str: str, salt: str) -> str:
+    """
+    Hashes a string using SHA-256
+    """
+    return hashlib.sha256((input_str + salt).encode()).hexdigest()
+T = TypeVar('T')
+def call_func(func: Callable[..., T], **kwargs) -> T:
+    """
+    Calls a function with the given arguments if func expects arguments, otherwise calls func without arguments
+    """
+    sig = inspect.signature(func)
+    # Filter kwargs to only include parameters that the function accepts
+    filtered_kwargs = {k: v for k, v in kwargs.items() if k in sig.parameters}
+    return func(**filtered_kwargs)
+def generate_pkce_challenge(code_verifier: str) -> str:
+    """Generate PKCE code challenge from code verifier"""
+    # Generate SHA256 hash of code_verifier
+    verifier_hash = hashlib.sha256(code_verifier.encode('utf-8')).digest()
+    # Base64 URL encode (without padding)
+    expected_challenge = base64.urlsafe_b64encode(verifier_hash).decode('utf-8').rstrip('=')
+    return expected_challenge
+def to_title_case(input_str: str) -> str:
+    """Convert a string to title case"""
+    spaced_str = input_str.replace('_', ' ').replace('-', ' ')
+    return spaced_str.title()
+def to_bool(val: object) -> bool:
+    """Convert common truthy/falsey representations to a boolean.
+    Accepted truthy values (case-insensitive): "1", "true", "t", "yes", "y", "on".
+    All other values are considered falsey. None is falsey.
+    """
+    if isinstance(val, bool):
+        return val
+    if val is None:
+        return False
+    s = str(val).strip().lower()
+    return s in ("1", "true", "t", "yes", "y", "on")
+ACCESS_LEVEL = Literal["admin", "member", "guest"]
+def get_access_level_rank(access_level: ACCESS_LEVEL) -> int:
+    """Get the rank of an access level. Lower ranks have more privileges."""
+    return { "admin": 1, "member": 2, "guest": 3 }.get(access_level.lower(), 1)
+def user_has_elevated_privileges(user_access_level: ACCESS_LEVEL, required_access_level: ACCESS_LEVEL) -> bool:
+    """Check if a user has privilege to access a resource"""
+    user_access_level_rank = get_access_level_rank(user_access_level)
+    required_access_level_rank = get_access_level_rank(required_access_level)
+    return user_access_level_rank <= required_access_level_rank

squirrels/_version.py CHANGED Viewed

@@ -1,3 +1 @@
-__version__ = '0.5.0'
-sq_major_version, sq_minor_version, sq_patch_version = __version__.split('.')[:3]
+__version__ = '0.6.0'

squirrels/arguments.py CHANGED Viewed

@@ -1,2 +1,7 @@
-from ._arguments._init_time_args import ConnectionsArgs, ParametersArgs, BuildModelArgs
-from ._arguments._run_time_args import ContextArgs, ModelArgs, DashboardArgs
+from ._arguments.init_time_args import ConnectionsArgs, AuthProviderArgs, ParametersArgs, BuildModelArgs
+from ._arguments.run_time_args import ContextArgs, ModelArgs, DashboardArgs
+__all__ = [
+    "ConnectionsArgs", "AuthProviderArgs", "ParametersArgs", "BuildModelArgs",
+    "ContextArgs", "ModelArgs", "DashboardArgs"
+]

squirrels/auth.py ADDED Viewed

@@ -0,0 +1,4 @@
+from ._schemas.auth_models import CustomUserFields, RegisteredUser
+from ._auth import ProviderConfigs, provider
+__all__ = ["CustomUserFields", "RegisteredUser", "ProviderConfigs", "provider"]

squirrels/connections.py CHANGED Viewed

@@ -1 +1,3 @@
 from ._manifest import ConnectionProperties, ConnectionTypeEnum
+__all__ = ["ConnectionProperties", "ConnectionTypeEnum"]

squirrels/dashboards.py CHANGED Viewed

@@ -1 +1,3 @@
-from ._dashboard_types import PngDashboard, HtmlDashboard
+from ._dashboards import PngDashboard, HtmlDashboard
+__all__ = ["PngDashboard", "HtmlDashboard"]

squirrels/data_sources.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from ._data_sources import (
+    SourceEnum,
     SelectDataSource,
     DateDataSource,
     DateRangeDataSource,
@@ -6,3 +7,8 @@ from ._data_sources import (
     NumberRangeDataSource,
     TextDataSource
 )
+__all__ = [
+    "SourceEnum", "SelectDataSource", "DateDataSource", "DateRangeDataSource",
+    "NumberDataSource", "NumberRangeDataSource", "TextDataSource"
+]

squirrels/parameter_options.py CHANGED Viewed

@@ -6,3 +6,8 @@ from ._parameter_options import (
     NumberRangeParameterOption,
     TextParameterOption
 )
+__all__ = [
+    "SelectParameterOption", "DateParameterOption", "DateRangeParameterOption",
+    "NumberParameterOption", "NumberRangeParameterOption", "TextParameterOption"
+]

squirrels/parameters.py CHANGED Viewed

@@ -7,3 +7,8 @@ from ._parameters import (
     NumberRangeParameter,
     TextParameter
 )
+__all__ = [
+    "SingleSelectParameter", "MultiSelectParameter", "DateParameter", "DateRangeParameter",
+    "NumberParameter", "NumberRangeParameter", "TextParameter"
+]

squirrels 0.5.0b3__py3-none-any.whl → 0.6.0.post0__py3-none-any.whl

squirrels 0.5.0b3py3-none-any.whl → 0.6.0.post0py3-none-any.whl