squirrels 0.5.0b3__py3-none-any.whl → 0.6.0.post0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- squirrels/__init__.py +4 -0
- squirrels/_api_routes/__init__.py +5 -0
- squirrels/_api_routes/auth.py +337 -0
- squirrels/_api_routes/base.py +196 -0
- squirrels/_api_routes/dashboards.py +156 -0
- squirrels/_api_routes/data_management.py +148 -0
- squirrels/_api_routes/datasets.py +220 -0
- squirrels/_api_routes/project.py +289 -0
- squirrels/_api_server.py +440 -792
- squirrels/_arguments/__init__.py +0 -0
- squirrels/_arguments/{_init_time_args.py → init_time_args.py} +23 -43
- squirrels/_arguments/{_run_time_args.py → run_time_args.py} +32 -68
- squirrels/_auth.py +590 -264
- squirrels/_command_line.py +130 -58
- squirrels/_compile_prompts.py +147 -0
- squirrels/_connection_set.py +16 -15
- squirrels/_constants.py +36 -11
- squirrels/_dashboards.py +179 -0
- squirrels/_data_sources.py +40 -34
- squirrels/_dataset_types.py +16 -11
- squirrels/_env_vars.py +209 -0
- squirrels/_exceptions.py +9 -37
- squirrels/_http_error_responses.py +52 -0
- squirrels/_initializer.py +7 -6
- squirrels/_logging.py +121 -0
- squirrels/_manifest.py +155 -77
- squirrels/_mcp_server.py +578 -0
- squirrels/_model_builder.py +11 -55
- squirrels/_model_configs.py +5 -5
- squirrels/_model_queries.py +1 -1
- squirrels/_models.py +276 -143
- squirrels/_package_data/base_project/.env +1 -24
- squirrels/_package_data/base_project/.env.example +31 -17
- squirrels/_package_data/base_project/connections.yml +4 -3
- squirrels/_package_data/base_project/dashboards/dashboard_example.py +13 -7
- squirrels/_package_data/base_project/dashboards/dashboard_example.yml +6 -6
- squirrels/_package_data/base_project/docker/Dockerfile +2 -2
- squirrels/_package_data/base_project/docker/compose.yml +1 -1
- squirrels/_package_data/base_project/duckdb_init.sql +1 -0
- squirrels/_package_data/base_project/models/builds/build_example.py +2 -2
- squirrels/_package_data/base_project/models/dbviews/dbview_example.sql +7 -2
- squirrels/_package_data/base_project/models/dbviews/dbview_example.yml +16 -10
- squirrels/_package_data/base_project/models/federates/federate_example.py +27 -17
- squirrels/_package_data/base_project/models/federates/federate_example.sql +3 -7
- squirrels/_package_data/base_project/models/federates/federate_example.yml +7 -7
- squirrels/_package_data/base_project/models/sources.yml +5 -6
- squirrels/_package_data/base_project/parameters.yml +24 -38
- squirrels/_package_data/base_project/pyconfigs/connections.py +8 -3
- squirrels/_package_data/base_project/pyconfigs/context.py +26 -14
- squirrels/_package_data/base_project/pyconfigs/parameters.py +124 -81
- squirrels/_package_data/base_project/pyconfigs/user.py +48 -15
- squirrels/_package_data/base_project/resources/public/.gitkeep +0 -0
- squirrels/_package_data/base_project/seeds/seed_categories.yml +1 -1
- squirrels/_package_data/base_project/seeds/seed_subcategories.yml +1 -1
- squirrels/_package_data/base_project/squirrels.yml.j2 +21 -31
- squirrels/_package_data/templates/login_successful.html +53 -0
- squirrels/_package_data/templates/squirrels_studio.html +22 -0
- squirrels/_parameter_configs.py +43 -22
- squirrels/_parameter_options.py +1 -1
- squirrels/_parameter_sets.py +41 -30
- squirrels/_parameters.py +560 -123
- squirrels/_project.py +487 -277
- squirrels/_py_module.py +71 -10
- squirrels/_request_context.py +33 -0
- squirrels/_schemas/__init__.py +0 -0
- squirrels/_schemas/auth_models.py +83 -0
- squirrels/_schemas/query_param_models.py +70 -0
- squirrels/_schemas/request_models.py +26 -0
- squirrels/_schemas/response_models.py +286 -0
- squirrels/_seeds.py +52 -13
- squirrels/_sources.py +29 -23
- squirrels/_utils.py +221 -42
- squirrels/_version.py +1 -3
- squirrels/arguments.py +7 -2
- squirrels/auth.py +4 -0
- squirrels/connections.py +2 -0
- squirrels/dashboards.py +3 -1
- squirrels/data_sources.py +6 -0
- squirrels/parameter_options.py +5 -0
- squirrels/parameters.py +5 -0
- squirrels/types.py +10 -3
- squirrels-0.6.0.post0.dist-info/METADATA +148 -0
- squirrels-0.6.0.post0.dist-info/RECORD +101 -0
- {squirrels-0.5.0b3.dist-info → squirrels-0.6.0.post0.dist-info}/WHEEL +1 -1
- squirrels/_api_response_models.py +0 -190
- squirrels/_dashboard_types.py +0 -82
- squirrels/_dashboards_io.py +0 -79
- squirrels-0.5.0b3.dist-info/METADATA +0 -110
- squirrels-0.5.0b3.dist-info/RECORD +0 -80
- /squirrels/_package_data/base_project/{assets → resources}/expenses.db +0 -0
- /squirrels/_package_data/base_project/{assets → resources}/weather.db +0 -0
- {squirrels-0.5.0b3.dist-info → squirrels-0.6.0.post0.dist-info}/entry_points.txt +0 -0
- {squirrels-0.5.0b3.dist-info → squirrels-0.6.0.post0.dist-info}/licenses/LICENSE +0 -0
squirrels/_model_builder.py
CHANGED
|
@@ -1,31 +1,26 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
|
-
import
|
|
2
|
+
import duckdb, time
|
|
3
3
|
|
|
4
4
|
from . import _utils as u, _connection_set as cs, _models as m
|
|
5
|
-
from ._exceptions import InvalidInputError
|
|
6
5
|
|
|
7
6
|
|
|
8
7
|
@dataclass
|
|
9
8
|
class ModelBuilder:
|
|
10
|
-
|
|
9
|
+
_datalake_db_path: str
|
|
11
10
|
_conn_set: cs.ConnectionSet
|
|
12
11
|
_static_models: dict[str, m.StaticModel]
|
|
13
|
-
_conn_args: cs.ConnectionsArgs
|
|
12
|
+
_conn_args: cs.ConnectionsArgs
|
|
14
13
|
_logger: u.Logger = field(default_factory=lambda: u.Logger(""))
|
|
15
14
|
|
|
16
|
-
def _attach_connections(self, duckdb_conn: duckdb.DuckDBPyConnection) ->
|
|
17
|
-
dialect_by_conn_name: dict[str, str] = {}
|
|
15
|
+
def _attach_connections(self, duckdb_conn: duckdb.DuckDBPyConnection) -> None:
|
|
18
16
|
for conn_name, conn_props in self._conn_set.get_connections_as_dict().items():
|
|
19
17
|
if not isinstance(conn_props, m.ConnectionProperties):
|
|
20
18
|
continue
|
|
21
|
-
dialect = conn_props.dialect
|
|
22
19
|
attach_uri = conn_props.attach_uri_for_duckdb
|
|
23
20
|
if attach_uri is None:
|
|
24
21
|
continue # skip unsupported dialects
|
|
25
|
-
attach_stmt = f"ATTACH IF NOT EXISTS '{attach_uri}' AS db_{conn_name} (
|
|
22
|
+
attach_stmt = f"ATTACH IF NOT EXISTS '{attach_uri}' AS db_{conn_name} (READ_ONLY)"
|
|
26
23
|
u.run_duckdb_stmt(self._logger, duckdb_conn, attach_stmt, redacted_values=[attach_uri])
|
|
27
|
-
dialect_by_conn_name[conn_name] = dialect
|
|
28
|
-
return dialect_by_conn_name
|
|
29
24
|
|
|
30
25
|
async def _build_models(self, duckdb_conn: duckdb.DuckDBPyConnection, select: str | None, full_refresh: bool) -> None:
|
|
31
26
|
"""
|
|
@@ -50,64 +45,25 @@ class ModelBuilder:
|
|
|
50
45
|
coroutines = []
|
|
51
46
|
for model_name in terminal_nodes:
|
|
52
47
|
model = self._static_models[model_name]
|
|
48
|
+
# await model.build_model(duckdb_conn, full_refresh)
|
|
53
49
|
coro = model.build_model(duckdb_conn, full_refresh)
|
|
54
50
|
coroutines.append(coro)
|
|
55
51
|
await u.asyncio_gather(coroutines)
|
|
56
52
|
|
|
57
|
-
async def build(self, full_refresh: bool, select: str | None
|
|
53
|
+
async def build(self, full_refresh: bool, select: str | None) -> None:
|
|
58
54
|
start = time.time()
|
|
59
55
|
|
|
60
|
-
#
|
|
61
|
-
|
|
62
|
-
duckdb_path.parent.mkdir(parents=True, exist_ok=True)
|
|
63
|
-
|
|
64
|
-
# Delete any existing DuckDB file if full refresh is requested
|
|
65
|
-
duckdb_dev_path = u.Path(self._duckdb_venv_path + ".dev")
|
|
66
|
-
duckdb_stg_path = u.Path(self._duckdb_venv_path + ".stg")
|
|
67
|
-
|
|
68
|
-
# If the development copy is already in use, a concurrent build is not allowed
|
|
69
|
-
duckdb_dev_lock_path = u.Path(self._duckdb_venv_path + ".dev.lock")
|
|
70
|
-
if duckdb_dev_lock_path.exists():
|
|
71
|
-
raise InvalidInputError(60, "An existing build process is already running and a concurrent build is not allowed")
|
|
72
|
-
duckdb_dev_lock_path.touch(exist_ok=False)
|
|
73
|
-
|
|
74
|
-
# Ensure the lock file is deleted even if an exception is raised
|
|
75
|
-
try:
|
|
76
|
-
# If not full refresh, create a development copy of the existing virtual data environment
|
|
77
|
-
if not full_refresh:
|
|
78
|
-
if duckdb_stg_path.exists():
|
|
79
|
-
duckdb_stg_path.replace(duckdb_dev_path)
|
|
80
|
-
elif duckdb_path.exists():
|
|
81
|
-
shutil.copy(duckdb_path, duckdb_dev_path)
|
|
82
|
-
else:
|
|
83
|
-
duckdb_dev_path.unlink(missing_ok=True) # delete any lingering development copy to create a fresh one later
|
|
84
|
-
|
|
85
|
-
self._logger.log_activity_time("creating development copy of virtual data environment", start)
|
|
86
|
-
|
|
87
|
-
# Connect to DuckDB file
|
|
88
|
-
duckdb_conn = u.create_duckdb_connection(duckdb_dev_path)
|
|
89
|
-
|
|
90
|
-
except Exception:
|
|
91
|
-
duckdb_dev_lock_path.unlink()
|
|
92
|
-
raise
|
|
56
|
+
# Connect directly to DuckLake instead of attaching (supports concurrent connections)
|
|
57
|
+
duckdb_conn = u.create_duckdb_connection(self._datalake_db_path)
|
|
93
58
|
|
|
94
|
-
# Sometimes code after conn.close() doesn't run (as if the python process is killed but no error is raised)
|
|
95
|
-
# Using a new try block to ensure the lock file is removed before closing the connection
|
|
96
59
|
try:
|
|
97
60
|
# Attach connections
|
|
98
61
|
self._attach_connections(duckdb_conn)
|
|
99
62
|
|
|
100
63
|
# Construct build models
|
|
101
64
|
await self._build_models(duckdb_conn, select, full_refresh)
|
|
102
|
-
|
|
65
|
+
|
|
103
66
|
finally:
|
|
104
|
-
duckdb_dev_lock_path.unlink()
|
|
105
67
|
duckdb_conn.close()
|
|
106
68
|
|
|
107
|
-
|
|
108
|
-
if stage_file:
|
|
109
|
-
duckdb_dev_path.replace(duckdb_stg_path)
|
|
110
|
-
else:
|
|
111
|
-
duckdb_dev_path.replace(duckdb_path)
|
|
112
|
-
|
|
113
|
-
self._logger.log_activity_time("TOTAL TIME to build virtual data environment", start)
|
|
69
|
+
self._logger.log_activity_time("TOTAL TIME to build the Virtual Data Lake (VDL)", start)
|
squirrels/_model_configs.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
2
|
from pydantic import BaseModel, Field
|
|
3
3
|
|
|
4
|
-
from . import
|
|
4
|
+
from ._env_vars import SquirrelsEnvVars
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class ColumnCategory(Enum):
|
|
@@ -13,7 +13,7 @@ class ColumnCategory(Enum):
|
|
|
13
13
|
class ColumnConfig(BaseModel):
|
|
14
14
|
name: str = Field(description="The name of the column")
|
|
15
15
|
type: str = Field(default="", description="The type of the column such as 'string', 'integer', 'float', 'boolean', 'datetime', etc.")
|
|
16
|
-
condition: str = Field(
|
|
16
|
+
condition: list[str] = Field(default_factory=list, description="The condition(s) of when the column is included. Only for documentation purposes.")
|
|
17
17
|
description: str = Field(default="", description="The description of the column")
|
|
18
18
|
category: ColumnCategory = Field(default=ColumnCategory.MISC, description="The category of the column, either 'dimension', 'measure', or 'misc'")
|
|
19
19
|
depends_on: set[str] = Field(default_factory=set, description="List of dependent columns")
|
|
@@ -32,9 +32,9 @@ class SeedConfig(ModelConfig):
|
|
|
32
32
|
class ConnectionInterface(BaseModel):
|
|
33
33
|
connection: str | None = Field(default=None, description="The connection name of the source model / database view")
|
|
34
34
|
|
|
35
|
-
def finalize_connection(self,
|
|
35
|
+
def finalize_connection(self, *, default_conn_name: str = "default"):
|
|
36
36
|
if self.connection is None:
|
|
37
|
-
self.connection =
|
|
37
|
+
self.connection = default_conn_name
|
|
38
38
|
return self
|
|
39
39
|
|
|
40
40
|
def get_connection(self) -> str:
|
|
@@ -66,7 +66,7 @@ class DbviewModelConfig(ConnectionInterface, QueryModelConfig):
|
|
|
66
66
|
|
|
67
67
|
|
|
68
68
|
class FederateModelConfig(QueryModelConfig):
|
|
69
|
-
eager: bool = Field(default=False, description="Whether the model should be materialized for SQL models")
|
|
69
|
+
eager: bool = Field(default=False, description="Whether the model should always be materialized in memory for SQL models")
|
|
70
70
|
|
|
71
71
|
def get_sql_for_create(self, model_name: str, select_query: str) -> str:
|
|
72
72
|
materialization = "TABLE" if self.eager else "VIEW"
|
squirrels/_model_queries.py
CHANGED
|
@@ -3,7 +3,7 @@ from dataclasses import dataclass, field
|
|
|
3
3
|
from typing import Callable, Generic, TypeVar, Any
|
|
4
4
|
import polars as pl, pandas as pd
|
|
5
5
|
|
|
6
|
-
from ._arguments.
|
|
6
|
+
from ._arguments.run_time_args import BuildModelArgs
|
|
7
7
|
from ._model_configs import ModelConfig
|
|
8
8
|
|
|
9
9
|
|