squirrels 0.4.1__py3-none-any.whl → 0.5.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of squirrels might be problematic. Click here for more details.
- squirrels/__init__.py +10 -6
- squirrels/_api_response_models.py +93 -44
- squirrels/_api_server.py +571 -219
- squirrels/_auth.py +451 -0
- squirrels/_command_line.py +61 -20
- squirrels/_connection_set.py +38 -25
- squirrels/_constants.py +44 -34
- squirrels/_dashboards_io.py +34 -16
- squirrels/_exceptions.py +57 -0
- squirrels/_initializer.py +117 -44
- squirrels/_manifest.py +124 -62
- squirrels/_model_builder.py +111 -0
- squirrels/_model_configs.py +74 -0
- squirrels/_model_queries.py +52 -0
- squirrels/_models.py +860 -354
- squirrels/_package_loader.py +8 -4
- squirrels/_parameter_configs.py +45 -65
- squirrels/_parameter_sets.py +15 -13
- squirrels/_project.py +561 -0
- squirrels/_py_module.py +4 -3
- squirrels/_seeds.py +35 -16
- squirrels/_sources.py +106 -0
- squirrels/_utils.py +166 -63
- squirrels/_version.py +1 -1
- squirrels/arguments/init_time_args.py +78 -15
- squirrels/arguments/run_time_args.py +62 -101
- squirrels/dashboards.py +4 -4
- squirrels/data_sources.py +94 -162
- squirrels/dataset_result.py +86 -0
- squirrels/dateutils.py +4 -4
- squirrels/package_data/base_project/.env +30 -0
- squirrels/package_data/base_project/.env.example +30 -0
- squirrels/package_data/base_project/.gitignore +3 -2
- squirrels/package_data/base_project/assets/expenses.db +0 -0
- squirrels/package_data/base_project/connections.yml +11 -3
- squirrels/package_data/base_project/dashboards/dashboard_example.py +15 -13
- squirrels/package_data/base_project/dashboards/dashboard_example.yml +22 -0
- squirrels/package_data/base_project/docker/.dockerignore +5 -2
- squirrels/package_data/base_project/docker/Dockerfile +3 -3
- squirrels/package_data/base_project/docker/compose.yml +1 -1
- squirrels/package_data/base_project/duckdb_init.sql +9 -0
- squirrels/package_data/base_project/macros/macros_example.sql +15 -0
- squirrels/package_data/base_project/models/builds/build_example.py +26 -0
- squirrels/package_data/base_project/models/builds/build_example.sql +16 -0
- squirrels/package_data/base_project/models/builds/build_example.yml +55 -0
- squirrels/package_data/base_project/models/dbviews/dbview_example.sql +12 -22
- squirrels/package_data/base_project/models/dbviews/dbview_example.yml +26 -0
- squirrels/package_data/base_project/models/federates/federate_example.py +38 -15
- squirrels/package_data/base_project/models/federates/federate_example.sql +16 -2
- squirrels/package_data/base_project/models/federates/federate_example.yml +65 -0
- squirrels/package_data/base_project/models/sources.yml +39 -0
- squirrels/package_data/base_project/parameters.yml +36 -21
- squirrels/package_data/base_project/pyconfigs/connections.py +6 -11
- squirrels/package_data/base_project/pyconfigs/context.py +20 -33
- squirrels/package_data/base_project/pyconfigs/parameters.py +19 -21
- squirrels/package_data/base_project/pyconfigs/user.py +23 -0
- squirrels/package_data/base_project/seeds/seed_categories.yml +15 -0
- squirrels/package_data/base_project/seeds/seed_subcategories.csv +15 -15
- squirrels/package_data/base_project/seeds/seed_subcategories.yml +21 -0
- squirrels/package_data/base_project/squirrels.yml.j2 +17 -40
- squirrels/parameters.py +20 -20
- {squirrels-0.4.1.dist-info → squirrels-0.5.0rc0.dist-info}/METADATA +31 -32
- squirrels-0.5.0rc0.dist-info/RECORD +70 -0
- {squirrels-0.4.1.dist-info → squirrels-0.5.0rc0.dist-info}/WHEEL +1 -1
- squirrels-0.5.0rc0.dist-info/entry_points.txt +3 -0
- {squirrels-0.4.1.dist-info → squirrels-0.5.0rc0.dist-info/licenses}/LICENSE +1 -1
- squirrels/_authenticator.py +0 -85
- squirrels/_environcfg.py +0 -84
- squirrels/package_data/assets/favicon.ico +0 -0
- squirrels/package_data/assets/index.css +0 -1
- squirrels/package_data/assets/index.js +0 -58
- squirrels/package_data/base_project/dashboards.yml +0 -10
- squirrels/package_data/base_project/env.yml +0 -29
- squirrels/package_data/base_project/models/dbviews/dbview_example.py +0 -47
- squirrels/package_data/base_project/pyconfigs/auth.py +0 -45
- squirrels/package_data/templates/index.html +0 -18
- squirrels/project.py +0 -378
- squirrels/user_base.py +0 -55
- squirrels-0.4.1.dist-info/RECORD +0 -60
- squirrels-0.4.1.dist-info/entry_points.txt +0 -4
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
import asyncio, shutil, duckdb, time
|
|
3
|
+
|
|
4
|
+
from . import _utils as u, _connection_set as cs, _models as m
|
|
5
|
+
from ._exceptions import InvalidInputError
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class ModelBuilder:
|
|
10
|
+
_duckdb_venv_path: str
|
|
11
|
+
_conn_set: cs.ConnectionSet
|
|
12
|
+
_static_models: dict[str, m.StaticModel]
|
|
13
|
+
_conn_args: cs.ConnectionsArgs = field(default_factory=lambda: cs.ConnectionsArgs(".", {}, {}))
|
|
14
|
+
_logger: u.Logger = field(default_factory=lambda: u.Logger(""))
|
|
15
|
+
|
|
16
|
+
def _attach_connections(self, duckdb_conn: duckdb.DuckDBPyConnection) -> dict[str, str]:
|
|
17
|
+
dialect_by_conn_name: dict[str, str] = {}
|
|
18
|
+
for conn_name, conn_props in self._conn_set.get_connections_as_dict().items():
|
|
19
|
+
if not isinstance(conn_props, m.ConnectionProperties):
|
|
20
|
+
continue
|
|
21
|
+
dialect = conn_props.dialect
|
|
22
|
+
attach_uri = conn_props.attach_uri_for_duckdb
|
|
23
|
+
if attach_uri is None:
|
|
24
|
+
continue # skip unsupported dialects
|
|
25
|
+
attach_stmt = f"ATTACH IF NOT EXISTS '{attach_uri}' AS db_{conn_name} (TYPE {dialect}, READ_ONLY)"
|
|
26
|
+
u.run_duckdb_stmt(self._logger, duckdb_conn, attach_stmt, redacted_values=[attach_uri])
|
|
27
|
+
dialect_by_conn_name[conn_name] = dialect
|
|
28
|
+
return dialect_by_conn_name
|
|
29
|
+
|
|
30
|
+
async def _build_models(self, duckdb_conn: duckdb.DuckDBPyConnection, select: str | None, full_refresh: bool) -> None:
|
|
31
|
+
"""
|
|
32
|
+
Compile and construct the build models as DuckDB tables.
|
|
33
|
+
"""
|
|
34
|
+
# Compile the build models
|
|
35
|
+
models_list = self._static_models.values() if select is None else [self._static_models[select]]
|
|
36
|
+
for model in models_list:
|
|
37
|
+
model.compile_for_build(self._conn_args, self._static_models)
|
|
38
|
+
|
|
39
|
+
# Find all terminal nodes
|
|
40
|
+
terminal_nodes = set()
|
|
41
|
+
if select is None:
|
|
42
|
+
for model in models_list:
|
|
43
|
+
terminal_nodes.update(model.get_terminal_nodes_for_build(set()))
|
|
44
|
+
for model in models_list:
|
|
45
|
+
model.confirmed_no_cycles = False
|
|
46
|
+
else:
|
|
47
|
+
terminal_nodes.add(select)
|
|
48
|
+
|
|
49
|
+
# Run the build models
|
|
50
|
+
coroutines = []
|
|
51
|
+
for model_name in terminal_nodes:
|
|
52
|
+
model = self._static_models[model_name]
|
|
53
|
+
coro = model.build_model(duckdb_conn, full_refresh)
|
|
54
|
+
coroutines.append(coro)
|
|
55
|
+
await u.asyncio_gather(coroutines)
|
|
56
|
+
|
|
57
|
+
async def build(self, full_refresh: bool, select: str | None, stage_file: bool) -> None:
|
|
58
|
+
start = time.time()
|
|
59
|
+
|
|
60
|
+
# Create target folder if it doesn't exist
|
|
61
|
+
duckdb_path = u.Path(self._duckdb_venv_path)
|
|
62
|
+
duckdb_path.parent.mkdir(parents=True, exist_ok=True)
|
|
63
|
+
|
|
64
|
+
# Delete any existing DuckDB file if full refresh is requested
|
|
65
|
+
duckdb_dev_path = u.Path(self._duckdb_venv_path + ".dev")
|
|
66
|
+
duckdb_stg_path = u.Path(self._duckdb_venv_path + ".stg")
|
|
67
|
+
|
|
68
|
+
# If the development copy is already in use, a concurrent build is not allowed
|
|
69
|
+
duckdb_dev_lock_path = u.Path(self._duckdb_venv_path + ".dev.lock")
|
|
70
|
+
if duckdb_dev_lock_path.exists():
|
|
71
|
+
raise InvalidInputError(60, "An existing build process is already running and a concurrent build is not allowed")
|
|
72
|
+
duckdb_dev_lock_path.touch(exist_ok=False)
|
|
73
|
+
|
|
74
|
+
# Ensure the lock file is deleted even if an exception is raised
|
|
75
|
+
try:
|
|
76
|
+
# If not full refresh, create a development copy of the existing virtual data environment
|
|
77
|
+
if not full_refresh:
|
|
78
|
+
if duckdb_stg_path.exists():
|
|
79
|
+
duckdb_stg_path.replace(duckdb_dev_path)
|
|
80
|
+
elif duckdb_path.exists():
|
|
81
|
+
shutil.copy(duckdb_path, duckdb_dev_path)
|
|
82
|
+
|
|
83
|
+
self._logger.log_activity_time("creating development copy of virtual data environment", start)
|
|
84
|
+
|
|
85
|
+
# Connect to DuckDB file
|
|
86
|
+
duckdb_conn = u.create_duckdb_connection(duckdb_dev_path)
|
|
87
|
+
|
|
88
|
+
except Exception:
|
|
89
|
+
duckdb_dev_lock_path.unlink()
|
|
90
|
+
raise
|
|
91
|
+
|
|
92
|
+
# Sometimes code after conn.close() doesn't run (as if the python process is killed but no error is raised)
|
|
93
|
+
# Using a new try block to ensure the lock file is removed before closing the connection
|
|
94
|
+
try:
|
|
95
|
+
# Attach connections
|
|
96
|
+
self._attach_connections(duckdb_conn)
|
|
97
|
+
|
|
98
|
+
# Construct build models
|
|
99
|
+
await self._build_models(duckdb_conn, select, full_refresh)
|
|
100
|
+
|
|
101
|
+
finally:
|
|
102
|
+
duckdb_dev_lock_path.unlink()
|
|
103
|
+
duckdb_conn.close()
|
|
104
|
+
|
|
105
|
+
# Rename duckdb_dev_path to duckdb_path (or duckdb_stg_path if stage_file is True)
|
|
106
|
+
if stage_file:
|
|
107
|
+
duckdb_dev_path.replace(duckdb_stg_path)
|
|
108
|
+
else:
|
|
109
|
+
duckdb_dev_path.replace(duckdb_path)
|
|
110
|
+
|
|
111
|
+
self._logger.log_activity_time("TOTAL TIME to build virtual data environment", start)
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from pydantic import BaseModel, Field
|
|
3
|
+
|
|
4
|
+
from . import _constants as c
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ColumnCategory(Enum):
|
|
8
|
+
DIMENSION = "dimension"
|
|
9
|
+
MEASURE = "measure"
|
|
10
|
+
MISC = "misc"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ColumnConfig(BaseModel):
|
|
14
|
+
name: str = Field(description="The name of the column")
|
|
15
|
+
type: str = Field(default="", description="The type of the column such as 'string', 'integer', 'float', 'boolean', 'datetime', etc.")
|
|
16
|
+
condition: str = Field(default="", description="The condition of when the column is included")
|
|
17
|
+
description: str = Field(default="", description="The description of the column")
|
|
18
|
+
category: ColumnCategory = Field(default=ColumnCategory.MISC, description="The category of the column, either 'dimension', 'measure', or 'misc'")
|
|
19
|
+
depends_on: set[str] = Field(default_factory=set, description="List of dependent columns")
|
|
20
|
+
pass_through: bool = Field(default=False, description="Whether the column should be passed through to the federate")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ModelConfig(BaseModel):
|
|
24
|
+
description: str = Field(default="", description="The description of the model")
|
|
25
|
+
columns: list[ColumnConfig] = Field(default_factory=list, description="The columns of the model")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class SeedConfig(ModelConfig):
|
|
29
|
+
cast_column_types: bool = Field(default=False, description="Whether the column types should be cast to the appropriate type")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ConnectionInterface(BaseModel):
|
|
33
|
+
connection: str | None = Field(default=None, description="The connection name of the source model / database view")
|
|
34
|
+
|
|
35
|
+
def finalize_connection(self, env_vars: dict[str, str]):
|
|
36
|
+
if self.connection is None:
|
|
37
|
+
self.connection = env_vars.get(c.SQRL_CONNECTIONS_DEFAULT_NAME_USED, "default")
|
|
38
|
+
return self
|
|
39
|
+
|
|
40
|
+
def get_connection(self) -> str:
|
|
41
|
+
assert self.connection is not None, "Connection must be set"
|
|
42
|
+
return self.connection
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class QueryModelConfig(ModelConfig):
|
|
46
|
+
depends_on: set[str] = Field(default_factory=set, description="The dependencies of the model")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class BuildModelConfig(QueryModelConfig):
|
|
50
|
+
materialization: str = Field(default="TABLE", description="The materialization of the model (ignored if Python model which is always a table)")
|
|
51
|
+
|
|
52
|
+
def get_sql_for_build(self, model_name: str, select_query: str) -> str:
|
|
53
|
+
if self.materialization.upper() == "TABLE":
|
|
54
|
+
materialization = "TABLE"
|
|
55
|
+
elif self.materialization.upper() == "VIEW":
|
|
56
|
+
materialization = "VIEW"
|
|
57
|
+
else:
|
|
58
|
+
raise ValueError(f"Invalid materialization: {self.materialization}")
|
|
59
|
+
|
|
60
|
+
create_prefix = f"CREATE OR REPLACE {materialization} {model_name} AS\n"
|
|
61
|
+
return create_prefix + select_query
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class DbviewModelConfig(ConnectionInterface, QueryModelConfig):
|
|
65
|
+
translate_to_duckdb: bool = Field(default=False, description="Whether to translate the query to DuckDB and use DuckDB tables at runtime")
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class FederateModelConfig(QueryModelConfig):
|
|
69
|
+
eager: bool = Field(default=False, description="Whether the model should be materialized for SQL models")
|
|
70
|
+
|
|
71
|
+
def get_sql_for_create(self, model_name: str, select_query: str) -> str:
|
|
72
|
+
materialization = "TABLE" if self.eager else "VIEW"
|
|
73
|
+
create_prefix = f"CREATE {materialization} {model_name} AS\n"
|
|
74
|
+
return create_prefix + select_query
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from abc import ABCMeta
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import Callable, Generic, TypeVar, Any
|
|
4
|
+
import polars as pl, pandas as pd
|
|
5
|
+
|
|
6
|
+
from .arguments.run_time_args import BuildModelArgs
|
|
7
|
+
from ._model_configs import ModelConfig
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# Input query file classes
|
|
11
|
+
|
|
12
|
+
@dataclass(frozen=True)
|
|
13
|
+
class QueryFile(metaclass=ABCMeta):
|
|
14
|
+
filepath: str
|
|
15
|
+
raw_query: Any
|
|
16
|
+
|
|
17
|
+
@dataclass(frozen=True)
|
|
18
|
+
class SqlQueryFile(QueryFile):
|
|
19
|
+
raw_query: str
|
|
20
|
+
|
|
21
|
+
@dataclass(frozen=True)
|
|
22
|
+
class PyQueryFile(QueryFile):
|
|
23
|
+
raw_query: Callable[[BuildModelArgs], pl.LazyFrame | pd.DataFrame]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
Q = TypeVar('Q', bound=QueryFile)
|
|
27
|
+
M = TypeVar('M', bound=ModelConfig)
|
|
28
|
+
|
|
29
|
+
@dataclass(frozen=True)
|
|
30
|
+
class QueryFileWithConfig(Generic[Q, M]):
|
|
31
|
+
query_file: Q
|
|
32
|
+
config: M
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# Compiled query classes
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class Query(metaclass=ABCMeta):
|
|
39
|
+
query: Any
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class WorkInProgress(Query):
|
|
43
|
+
query: None = field(default=None, init=False)
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class SqlModelQuery(Query):
|
|
47
|
+
query: str
|
|
48
|
+
is_duckdb: bool
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class PyModelQuery(Query):
|
|
52
|
+
query: Callable[[], pl.LazyFrame | pd.DataFrame]
|