squirrels 0.5.0b4__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of squirrels might be problematic. Click here for more details.
- squirrels/__init__.py +2 -0
- squirrels/_api_routes/auth.py +83 -74
- squirrels/_api_routes/base.py +58 -41
- squirrels/_api_routes/dashboards.py +37 -21
- squirrels/_api_routes/data_management.py +72 -27
- squirrels/_api_routes/datasets.py +107 -84
- squirrels/_api_routes/oauth2.py +11 -13
- squirrels/_api_routes/project.py +71 -33
- squirrels/_api_server.py +130 -63
- squirrels/_arguments/run_time_args.py +9 -9
- squirrels/_auth.py +117 -162
- squirrels/_command_line.py +68 -32
- squirrels/_compile_prompts.py +147 -0
- squirrels/_connection_set.py +11 -2
- squirrels/_constants.py +22 -8
- squirrels/_data_sources.py +38 -32
- squirrels/_dataset_types.py +2 -4
- squirrels/_initializer.py +1 -1
- squirrels/_logging.py +117 -0
- squirrels/_manifest.py +125 -58
- squirrels/_model_builder.py +10 -54
- squirrels/_models.py +224 -108
- squirrels/_package_data/base_project/.env +15 -4
- squirrels/_package_data/base_project/.env.example +14 -3
- squirrels/_package_data/base_project/connections.yml +4 -3
- squirrels/_package_data/base_project/dashboards/dashboard_example.py +2 -2
- squirrels/_package_data/base_project/dashboards/dashboard_example.yml +4 -4
- squirrels/_package_data/base_project/duckdb_init.sql +1 -0
- squirrels/_package_data/base_project/models/dbviews/dbview_example.sql +7 -2
- squirrels/_package_data/base_project/models/dbviews/dbview_example.yml +16 -10
- squirrels/_package_data/base_project/models/federates/federate_example.py +22 -15
- squirrels/_package_data/base_project/models/federates/federate_example.sql +3 -7
- squirrels/_package_data/base_project/models/federates/federate_example.yml +1 -1
- squirrels/_package_data/base_project/models/sources.yml +5 -6
- squirrels/_package_data/base_project/parameters.yml +24 -38
- squirrels/_package_data/base_project/pyconfigs/connections.py +5 -1
- squirrels/_package_data/base_project/pyconfigs/context.py +23 -12
- squirrels/_package_data/base_project/pyconfigs/parameters.py +68 -33
- squirrels/_package_data/base_project/pyconfigs/user.py +11 -18
- squirrels/_package_data/base_project/seeds/seed_categories.yml +1 -1
- squirrels/_package_data/base_project/seeds/seed_subcategories.yml +1 -1
- squirrels/_package_data/base_project/squirrels.yml.j2 +18 -28
- squirrels/_package_data/templates/squirrels_studio.html +20 -0
- squirrels/_parameter_configs.py +43 -22
- squirrels/_parameter_options.py +1 -1
- squirrels/_parameter_sets.py +8 -10
- squirrels/_project.py +351 -234
- squirrels/_request_context.py +33 -0
- squirrels/_schemas/auth_models.py +32 -9
- squirrels/_schemas/query_param_models.py +9 -1
- squirrels/_schemas/response_models.py +36 -10
- squirrels/_seeds.py +1 -1
- squirrels/_sources.py +23 -19
- squirrels/_utils.py +83 -35
- squirrels/_version.py +1 -1
- squirrels/arguments.py +5 -0
- squirrels/auth.py +4 -1
- squirrels/connections.py +2 -0
- squirrels/dashboards.py +3 -1
- squirrels/data_sources.py +6 -0
- squirrels/parameter_options.py +5 -0
- squirrels/parameters.py +5 -0
- squirrels/types.py +6 -1
- {squirrels-0.5.0b4.dist-info → squirrels-0.5.1.dist-info}/METADATA +28 -13
- squirrels-0.5.1.dist-info/RECORD +98 -0
- squirrels-0.5.0b4.dist-info/RECORD +0 -94
- {squirrels-0.5.0b4.dist-info → squirrels-0.5.1.dist-info}/WHEEL +0 -0
- {squirrels-0.5.0b4.dist-info → squirrels-0.5.1.dist-info}/entry_points.txt +0 -0
- {squirrels-0.5.0b4.dist-info → squirrels-0.5.1.dist-info}/licenses/LICENSE +0 -0
squirrels/_project.py
CHANGED
|
@@ -1,95 +1,113 @@
|
|
|
1
|
-
from dotenv import dotenv_values
|
|
2
|
-
from uuid import uuid4
|
|
1
|
+
from dotenv import dotenv_values, load_dotenv
|
|
3
2
|
from pathlib import Path
|
|
4
3
|
import asyncio, typing as t, functools as ft, shutil, json, os
|
|
5
|
-
import
|
|
6
|
-
import sqlglot, sqlglot.expressions
|
|
4
|
+
import sqlglot, sqlglot.expressions, duckdb, polars as pl
|
|
7
5
|
|
|
8
|
-
from ._auth import Authenticator,
|
|
6
|
+
from ._auth import Authenticator, AuthProviderArgs, ProviderFunctionType
|
|
7
|
+
from ._schemas.auth_models import CustomUserFields, AbstractUser, GuestUser, RegisteredUser
|
|
9
8
|
from ._schemas import response_models as rm
|
|
10
9
|
from ._model_builder import ModelBuilder
|
|
11
10
|
from ._exceptions import InvalidInputError, ConfigurationError
|
|
12
11
|
from ._py_module import PyModule
|
|
13
12
|
from . import _dashboards as d, _utils as u, _constants as c, _manifest as mf, _connection_set as cs
|
|
14
13
|
from . import _seeds as s, _models as m, _model_configs as mc, _model_queries as mq, _sources as so
|
|
15
|
-
from . import _parameter_sets as ps, _dataset_types as dr
|
|
14
|
+
from . import _parameter_sets as ps, _dataset_types as dr, _logging as l
|
|
16
15
|
|
|
17
16
|
T = t.TypeVar("T", bound=d.Dashboard)
|
|
18
17
|
M = t.TypeVar("M", bound=m.DataModel)
|
|
19
18
|
|
|
20
19
|
|
|
21
|
-
class _CustomJsonFormatter(l.Formatter):
|
|
22
|
-
def format(self, record: l.LogRecord) -> str:
|
|
23
|
-
super().format(record)
|
|
24
|
-
info = {
|
|
25
|
-
"timestamp": self.formatTime(record),
|
|
26
|
-
"project_id": record.name,
|
|
27
|
-
"level": record.levelname,
|
|
28
|
-
"message": record.getMessage(),
|
|
29
|
-
"thread": record.thread,
|
|
30
|
-
"thread_name": record.threadName,
|
|
31
|
-
"process": record.process,
|
|
32
|
-
**record.__dict__.get("info", {})
|
|
33
|
-
}
|
|
34
|
-
output = {
|
|
35
|
-
"data": record.__dict__.get("data", {}),
|
|
36
|
-
"info": info
|
|
37
|
-
}
|
|
38
|
-
return json.dumps(output)
|
|
39
|
-
|
|
40
|
-
|
|
41
20
|
class SquirrelsProject:
|
|
42
21
|
"""
|
|
43
22
|
Initiate an instance of this class to interact with a Squirrels project through Python code. For example this can be handy to experiment with the datasets produced by Squirrels in a Jupyter notebook.
|
|
44
23
|
"""
|
|
45
24
|
|
|
46
|
-
def __init__(
|
|
25
|
+
def __init__(
|
|
26
|
+
self, *, filepath: str = ".", load_dotenv_globally: bool = False,
|
|
27
|
+
log_to_file: bool = False, log_level: str | None = None, log_format: str | None = None,
|
|
28
|
+
) -> None:
|
|
47
29
|
"""
|
|
48
30
|
Constructor for SquirrelsProject class. Loads the file contents of the Squirrels project into memory as member fields.
|
|
49
31
|
|
|
50
32
|
Arguments:
|
|
51
33
|
filepath: The path to the Squirrels project file. Defaults to the current working directory.
|
|
52
|
-
log_level: The logging level to use. Options are "DEBUG", "INFO", and "WARNING". Default is "INFO".
|
|
53
|
-
|
|
54
|
-
log_format: The format of the log records. Options are "text" and "json". Default is "text".
|
|
34
|
+
log_level: The logging level to use. Options are "DEBUG", "INFO", and "WARNING". Default is from SQRL_LOGGING__LOG_LEVEL environment variable or "INFO".
|
|
35
|
+
log_to_file: Whether to enable logging to file(s) in the "logs/" folder with rotation and retention policies. Default is False.
|
|
36
|
+
log_format: The format of the log records. Options are "text" and "json". Default is from SQRL_LOGGING__LOG_FORMAT environment variable or "text".
|
|
55
37
|
"""
|
|
56
38
|
self._filepath = filepath
|
|
57
|
-
self.
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
39
|
+
self._load_dotenv_globally = load_dotenv_globally
|
|
40
|
+
self._logger = self._get_logger(filepath, log_to_file, log_level, log_format)
|
|
41
|
+
self._ensure_virtual_datalake_exists(filepath)
|
|
42
|
+
|
|
43
|
+
def _get_logger(self, filepath: str, log_to_file: bool, log_level: str | None, log_format: str | None) -> u.Logger:
|
|
44
|
+
env_vars = self._env_vars
|
|
45
|
+
# CLI arguments take precedence over environment variables
|
|
46
|
+
log_level = log_level if log_level is not None else env_vars.get(c.SQRL_LOGGING_LOG_LEVEL, "INFO")
|
|
47
|
+
log_format = log_format if log_format is not None else env_vars.get(c.SQRL_LOGGING_LOG_FORMAT, "text")
|
|
48
|
+
log_to_file = log_to_file or u.to_bool(env_vars.get(c.SQRL_LOGGING_LOG_TO_FILE, "false"))
|
|
49
|
+
log_file_size_mb = int(env_vars.get(c.SQRL_LOGGING_LOG_FILE_SIZE_MB, 50))
|
|
50
|
+
log_file_backup_count = int(env_vars.get(c.SQRL_LOGGING_LOG_FILE_BACKUP_COUNT, 1))
|
|
51
|
+
return l.get_logger(filepath, log_to_file, log_level, log_format, log_file_size_mb, log_file_backup_count)
|
|
52
|
+
|
|
53
|
+
def _ensure_virtual_datalake_exists(self, project_path: str) -> None:
|
|
54
|
+
target_path = u.Path(project_path, c.TARGET_FOLDER)
|
|
55
|
+
target_path.mkdir(parents=True, exist_ok=True)
|
|
56
|
+
|
|
57
|
+
# Attempt to set up the virtual data lake with DATA_PATH if possible
|
|
58
|
+
try:
|
|
59
|
+
is_ducklake = self._datalake_db_path.startswith("ducklake:")
|
|
74
60
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
61
|
+
data_path = self._env_vars.get(c.SQRL_VDL_DATA_PATH, c.DEFAULT_VDL_DATA_PATH)
|
|
62
|
+
data_path = data_path.format(project_path=project_path)
|
|
63
|
+
|
|
64
|
+
options = f"(DATA_PATH '{data_path}')" if is_ducklake else ""
|
|
65
|
+
attach_stmt = f"ATTACH '{self._datalake_db_path}' AS vdl {options}"
|
|
66
|
+
with duckdb.connect() as conn:
|
|
67
|
+
conn.execute(attach_stmt)
|
|
68
|
+
# TODO: support incremental loads for build models and avoid cleaning up old files all the time
|
|
69
|
+
conn.execute("CALL ducklake_expire_snapshots('vdl', older_than => now())")
|
|
70
|
+
conn.execute("CALL ducklake_cleanup_old_files('vdl', cleanup_all => true)")
|
|
71
|
+
|
|
72
|
+
except Exception as e:
|
|
73
|
+
if "DATA_PATH parameter" in str(e):
|
|
74
|
+
first_line = str(e).split("\n")[0]
|
|
75
|
+
note = "NOTE: Squirrels does not allow changing the data path for an existing Virtual Data Lake (VDL)"
|
|
76
|
+
raise u.ConfigurationError(f"{first_line}\n\n{note}")
|
|
77
|
+
|
|
78
|
+
if is_ducklake and not any(x in self._datalake_db_path for x in [":sqlite:", ":postgres:", ":mysql:"]):
|
|
79
|
+
extended_error = "\n Note: if you're using DuckDB for the metadata database, only one process can connect to the VDL at a time."
|
|
80
|
+
else:
|
|
81
|
+
extended_error = ""
|
|
82
|
+
|
|
83
|
+
raise u.ConfigurationError(f"Failed to attach Virtual Data Lake (VDL).{extended_error}") from e
|
|
84
84
|
|
|
85
85
|
@ft.cached_property
|
|
86
86
|
def _env_vars(self) -> dict[str, str]:
|
|
87
87
|
dotenv_files = [c.DOTENV_FILE, c.DOTENV_LOCAL_FILE]
|
|
88
88
|
dotenv_vars = {}
|
|
89
89
|
for file in dotenv_files:
|
|
90
|
-
|
|
90
|
+
full_path = u.Path(self._filepath, file)
|
|
91
|
+
if self._load_dotenv_globally:
|
|
92
|
+
load_dotenv(full_path)
|
|
93
|
+
dotenv_vars.update({k: v for k, v in dotenv_values(full_path).items() if v is not None})
|
|
91
94
|
return {**os.environ, **dotenv_vars}
|
|
92
95
|
|
|
96
|
+
@ft.cached_property
|
|
97
|
+
def _elevated_access_level(self) -> u.ACCESS_LEVEL:
|
|
98
|
+
elevated_access_level = self._env_vars.get(c.SQRL_PERMISSIONS_ELEVATED_ACCESS_LEVEL, "admin").lower()
|
|
99
|
+
|
|
100
|
+
if elevated_access_level not in ["admin", "member", "guest"]:
|
|
101
|
+
raise u.ConfigurationError(f"{c.SQRL_PERMISSIONS_ELEVATED_ACCESS_LEVEL} has been set to an invalid access level: {elevated_access_level}")
|
|
102
|
+
|
|
103
|
+
return elevated_access_level
|
|
104
|
+
|
|
105
|
+
@ft.cached_property
|
|
106
|
+
def _datalake_db_path(self) -> str:
|
|
107
|
+
datalake_db_path = self._env_vars.get(c.SQRL_VDL_CATALOG_DB_PATH, c.DEFAULT_VDL_CATALOG_DB_PATH)
|
|
108
|
+
datalake_db_path = datalake_db_path.format(project_path=self._filepath)
|
|
109
|
+
return datalake_db_path
|
|
110
|
+
|
|
93
111
|
@ft.cached_property
|
|
94
112
|
def _manifest_cfg(self) -> mf.ManifestConfig:
|
|
95
113
|
return mf.ManifestIO.load_from_file(self._logger, self._filepath, self._env_vars)
|
|
@@ -131,18 +149,19 @@ class SquirrelsProject:
|
|
|
131
149
|
return cs.ConnectionSetIO.load_from_file(self._logger, self._filepath, self._manifest_cfg, self._conn_args)
|
|
132
150
|
|
|
133
151
|
@ft.cached_property
|
|
134
|
-
def
|
|
152
|
+
def _custom_user_fields_cls_and_provider_functions(self) -> tuple[type[CustomUserFields], list[ProviderFunctionType]]:
|
|
135
153
|
user_module_path = u.Path(self._filepath, c.PYCONFIGS_FOLDER, c.USER_FILE)
|
|
136
154
|
user_module = PyModule(user_module_path)
|
|
137
155
|
|
|
138
|
-
|
|
156
|
+
# Load CustomUserFields class (adds to Authenticator.providers as side effect)
|
|
157
|
+
CustomUserFieldsCls = user_module.get_func_or_class("CustomUserFields", default_attr=CustomUserFields)
|
|
139
158
|
provider_functions = Authenticator.providers
|
|
140
159
|
Authenticator.providers = []
|
|
141
160
|
|
|
142
|
-
if not issubclass(
|
|
143
|
-
raise ConfigurationError(f"
|
|
161
|
+
if not issubclass(CustomUserFieldsCls, CustomUserFields):
|
|
162
|
+
raise ConfigurationError(f"CustomUserFields class in '{c.USER_FILE}' must inherit from CustomUserFields")
|
|
144
163
|
|
|
145
|
-
return
|
|
164
|
+
return CustomUserFieldsCls, provider_functions
|
|
146
165
|
|
|
147
166
|
@ft.cached_property
|
|
148
167
|
def _auth_args(self) -> AuthProviderArgs:
|
|
@@ -150,9 +169,20 @@ class SquirrelsProject:
|
|
|
150
169
|
return AuthProviderArgs(conn_args.project_path, conn_args.proj_vars, conn_args.env_vars)
|
|
151
170
|
|
|
152
171
|
@ft.cached_property
|
|
153
|
-
def _auth(self) -> Authenticator
|
|
154
|
-
|
|
155
|
-
|
|
172
|
+
def _auth(self) -> Authenticator:
|
|
173
|
+
CustomUserFieldsCls, provider_functions = self._custom_user_fields_cls_and_provider_functions
|
|
174
|
+
external_only = (self._manifest_cfg.authentication.type == mf.AuthenticationType.EXTERNAL)
|
|
175
|
+
return Authenticator(self._logger, self._filepath, self._auth_args, provider_functions, custom_user_fields_cls=CustomUserFieldsCls, external_only=external_only)
|
|
176
|
+
|
|
177
|
+
@ft.cached_property
|
|
178
|
+
def _guest_user(self) -> AbstractUser:
|
|
179
|
+
custom_fields = self._auth.CustomUserFields()
|
|
180
|
+
return GuestUser(username="", custom_fields=custom_fields)
|
|
181
|
+
|
|
182
|
+
@ft.cached_property
|
|
183
|
+
def _admin_user(self) -> AbstractUser:
|
|
184
|
+
custom_fields = self._auth.CustomUserFields()
|
|
185
|
+
return RegisteredUser(username="", access_level="admin", custom_fields=custom_fields)
|
|
156
186
|
|
|
157
187
|
@ft.cached_property
|
|
158
188
|
def _param_args(self) -> ps.ParametersArgs:
|
|
@@ -162,7 +192,7 @@ class SquirrelsProject:
|
|
|
162
192
|
@ft.cached_property
|
|
163
193
|
def _param_cfg_set(self) -> ps.ParameterConfigsSet:
|
|
164
194
|
return ps.ParameterConfigsSetIO.load_from_file(
|
|
165
|
-
self._logger, self._filepath, self._manifest_cfg, self._seeds, self._conn_set, self._param_args
|
|
195
|
+
self._logger, self._filepath, self._manifest_cfg, self._seeds, self._conn_set, self._param_args, self._datalake_db_path
|
|
166
196
|
)
|
|
167
197
|
|
|
168
198
|
@ft.cached_property
|
|
@@ -189,11 +219,6 @@ class SquirrelsProject:
|
|
|
189
219
|
env.filters["quote_and_join"] = quote_and_join
|
|
190
220
|
return env
|
|
191
221
|
|
|
192
|
-
@ft.cached_property
|
|
193
|
-
def _duckdb_venv_path(self) -> str:
|
|
194
|
-
duckdb_filepath_setting_val = self._env_vars.get(c.SQRL_DUCKDB_VENV_DB_FILE_PATH, f"{c.TARGET_FOLDER}/{c.DUCKDB_VENV_FILE}")
|
|
195
|
-
return str(Path(self._filepath, duckdb_filepath_setting_val))
|
|
196
|
-
|
|
197
222
|
def close(self) -> None:
|
|
198
223
|
"""
|
|
199
224
|
Deliberately close any open resources within the Squirrels project, such as database connections (instead of relying on the garbage collector).
|
|
@@ -228,20 +253,20 @@ class SquirrelsProject:
|
|
|
228
253
|
return models_dict
|
|
229
254
|
|
|
230
255
|
|
|
231
|
-
async def build(self, *, full_refresh: bool = False, select: str | None = None
|
|
256
|
+
async def build(self, *, full_refresh: bool = False, select: str | None = None) -> None:
|
|
232
257
|
"""
|
|
233
|
-
Build the
|
|
258
|
+
Build the Virtual Data Lake (VDL) for the Squirrels project
|
|
234
259
|
|
|
235
260
|
Arguments:
|
|
236
|
-
full_refresh: Whether to drop all tables and rebuild the
|
|
237
|
-
|
|
261
|
+
full_refresh: Whether to drop all tables and rebuild the VDL from scratch. Default is False.
|
|
262
|
+
select: The name of a specific model to build. If None, all models are built. Default is None.
|
|
238
263
|
"""
|
|
239
264
|
models_dict: dict[str, m.StaticModel] = self._get_static_models()
|
|
240
|
-
builder = ModelBuilder(self.
|
|
241
|
-
await builder.build(full_refresh, select
|
|
265
|
+
builder = ModelBuilder(self._datalake_db_path, self._conn_set, models_dict, self._conn_args, self._logger)
|
|
266
|
+
await builder.build(full_refresh, select)
|
|
242
267
|
|
|
243
268
|
def _get_models_dict(self, always_python_df: bool) -> dict[str, m.DataModel]:
|
|
244
|
-
models_dict: dict[str, m.DataModel] =
|
|
269
|
+
models_dict: dict[str, m.DataModel] = self._get_static_models()
|
|
245
270
|
|
|
246
271
|
for name, val in self._dbview_model_files.items():
|
|
247
272
|
self._add_model(models_dict, m.DbviewModel(
|
|
@@ -257,19 +282,18 @@ class SquirrelsProject:
|
|
|
257
282
|
|
|
258
283
|
return models_dict
|
|
259
284
|
|
|
260
|
-
def _generate_dag(self, dataset: str
|
|
261
|
-
models_dict = self._get_models_dict(always_python_df)
|
|
285
|
+
def _generate_dag(self, dataset: str) -> m.DAG:
|
|
286
|
+
models_dict = self._get_models_dict(always_python_df=False)
|
|
262
287
|
|
|
263
288
|
dataset_config = self._manifest_cfg.datasets[dataset]
|
|
264
|
-
|
|
265
|
-
target_model = models_dict[target_model_name]
|
|
289
|
+
target_model = models_dict[dataset_config.model]
|
|
266
290
|
target_model.is_target = True
|
|
267
|
-
dag = m.DAG(dataset_config, target_model, models_dict, self.
|
|
291
|
+
dag = m.DAG(dataset_config, target_model, models_dict, self._datalake_db_path, self._logger)
|
|
268
292
|
|
|
269
293
|
return dag
|
|
270
294
|
|
|
271
|
-
def _generate_dag_with_fake_target(self, sql_query: str | None) -> m.DAG:
|
|
272
|
-
models_dict = self._get_models_dict(always_python_df=
|
|
295
|
+
def _generate_dag_with_fake_target(self, sql_query: str | None, *, always_python_df: bool = False) -> m.DAG:
|
|
296
|
+
models_dict = self._get_models_dict(always_python_df=always_python_df)
|
|
273
297
|
|
|
274
298
|
if sql_query is None:
|
|
275
299
|
dependencies = set(models_dict.keys())
|
|
@@ -279,54 +303,45 @@ class SquirrelsProject:
|
|
|
279
303
|
substitutions = {}
|
|
280
304
|
for model_name in dependencies:
|
|
281
305
|
model = models_dict[model_name]
|
|
282
|
-
if isinstance(model, m.SourceModel) and not model.
|
|
283
|
-
raise InvalidInputError(400, "
|
|
284
|
-
if isinstance(model,
|
|
285
|
-
substitutions[model_name] = f"
|
|
306
|
+
if isinstance(model, m.SourceModel) and not model.is_queryable:
|
|
307
|
+
raise InvalidInputError(400, "cannot_query_source_model", f"Source model '{model_name}' cannot be queried with DuckDB")
|
|
308
|
+
if isinstance(model, m.BuildModel):
|
|
309
|
+
substitutions[model_name] = f"vdl.{model_name}"
|
|
310
|
+
elif isinstance(model, m.SourceModel):
|
|
311
|
+
if model.model_config.load_to_vdl:
|
|
312
|
+
substitutions[model_name] = f"vdl.{model_name}"
|
|
313
|
+
else:
|
|
314
|
+
# DuckDB connection without load_to_vdl - reference via attached database
|
|
315
|
+
conn_name = model.model_config.get_connection()
|
|
316
|
+
table_name = model.model_config.get_table()
|
|
317
|
+
substitutions[model_name] = f"db_{conn_name}.{table_name}"
|
|
286
318
|
|
|
287
319
|
sql_query = parsed.transform(
|
|
288
|
-
lambda node: sqlglot.expressions.Table(this=substitutions[node.name])
|
|
320
|
+
lambda node: sqlglot.expressions.Table(this=substitutions[node.name], alias=node.alias)
|
|
289
321
|
if isinstance(node, sqlglot.expressions.Table) and node.name in substitutions
|
|
290
322
|
else node
|
|
291
323
|
).sql()
|
|
292
324
|
|
|
293
325
|
model_config = mc.FederateModelConfig(depends_on=dependencies)
|
|
294
|
-
query_file = mq.SqlQueryFile("", sql_query or "")
|
|
326
|
+
query_file = mq.SqlQueryFile("", sql_query or "SELECT 1")
|
|
295
327
|
fake_target_model = m.FederateModel(
|
|
296
328
|
"__fake_target", model_config, query_file, logger=self._logger, env_vars=self._env_vars, conn_set=self._conn_set, j2_env=self._j2_env
|
|
297
329
|
)
|
|
298
330
|
fake_target_model.is_target = True
|
|
299
|
-
dag = m.DAG(None, fake_target_model, models_dict, self.
|
|
331
|
+
dag = m.DAG(None, fake_target_model, models_dict, self._datalake_db_path, self._logger)
|
|
300
332
|
return dag
|
|
301
333
|
|
|
302
|
-
def
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
G = dag.to_networkx_graph()
|
|
309
|
-
|
|
310
|
-
fig, _ = plt.subplots()
|
|
311
|
-
pos = nx.multipartite_layout(G, subset_key="layer")
|
|
312
|
-
colors = [color_map[node[1]] for node in G.nodes(data="model_type")] # type: ignore
|
|
313
|
-
nx.draw(G, pos=pos, node_shape='^', node_size=1000, node_color=colors, arrowsize=20)
|
|
334
|
+
async def _get_compiled_dag(
|
|
335
|
+
self, user: AbstractUser, *, sql_query: str | None = None, selections: dict[str, t.Any] = {}, configurables: dict[str, str] = {},
|
|
336
|
+
always_python_df: bool = False
|
|
337
|
+
) -> m.DAG:
|
|
338
|
+
dag = self._generate_dag_with_fake_target(sql_query, always_python_df=always_python_df)
|
|
314
339
|
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
fig.tight_layout()
|
|
321
|
-
plt.margins(x=0.1, y=0.1)
|
|
322
|
-
fig.savefig(Path(output_folder, "dag.png"))
|
|
323
|
-
plt.close(fig)
|
|
324
|
-
|
|
325
|
-
async def _get_compiled_dag(self, *, sql_query: str | None = None, selections: dict[str, t.Any] = {}, user: BaseUser | None = None) -> m.DAG:
|
|
326
|
-
dag = self._generate_dag_with_fake_target(sql_query)
|
|
327
|
-
|
|
328
|
-
default_traits = self._manifest_cfg.get_default_traits()
|
|
329
|
-
await dag.execute(self._param_args, self._param_cfg_set, self._context_func, user, selections, runquery=False, default_traits=default_traits)
|
|
340
|
+
configurables = {**self._manifest_cfg.get_default_configurables(), **configurables}
|
|
341
|
+
await dag.execute(
|
|
342
|
+
self._param_args, self._param_cfg_set, self._context_func, user, selections,
|
|
343
|
+
runquery=False, configurables=configurables
|
|
344
|
+
)
|
|
330
345
|
return dag
|
|
331
346
|
|
|
332
347
|
def _get_all_connections(self) -> list[rm.ConnectionItemModel]:
|
|
@@ -347,7 +362,7 @@ class SquirrelsProject:
|
|
|
347
362
|
Returns:
|
|
348
363
|
A list of DataModelItem objects
|
|
349
364
|
"""
|
|
350
|
-
compiled_dag = await self._get_compiled_dag()
|
|
365
|
+
compiled_dag = await self._get_compiled_dag(self._admin_user)
|
|
351
366
|
return self._get_all_data_models(compiled_dag)
|
|
352
367
|
|
|
353
368
|
def _get_all_data_lineage(self, compiled_dag: m.DAG) -> list[rm.LineageRelation]:
|
|
@@ -376,130 +391,172 @@ class SquirrelsProject:
|
|
|
376
391
|
Returns:
|
|
377
392
|
A list of LineageRelation objects
|
|
378
393
|
"""
|
|
379
|
-
compiled_dag = await self._get_compiled_dag()
|
|
394
|
+
compiled_dag = await self._get_compiled_dag(self._admin_user)
|
|
380
395
|
return self._get_all_data_lineage(compiled_dag)
|
|
381
396
|
|
|
382
|
-
async def _write_dataset_outputs_given_test_set(
|
|
383
|
-
self, dataset: str, select: str, test_set: str | None, runquery: bool, recurse: bool
|
|
384
|
-
) -> t.Any | None:
|
|
385
|
-
dataset_conf = self._manifest_cfg.datasets[dataset]
|
|
386
|
-
default_test_set_conf = self._manifest_cfg.get_default_test_set(dataset)
|
|
387
|
-
if test_set in self._manifest_cfg.selection_test_sets:
|
|
388
|
-
test_set_conf = self._manifest_cfg.selection_test_sets[test_set]
|
|
389
|
-
elif test_set is None or test_set == default_test_set_conf.name:
|
|
390
|
-
test_set, test_set_conf = default_test_set_conf.name, default_test_set_conf
|
|
391
|
-
else:
|
|
392
|
-
raise ConfigurationError(f"No test set named '{test_set}' was found when compiling dataset '{dataset}'. The test set must be defined if not default for dataset.")
|
|
393
|
-
|
|
394
|
-
error_msg_intro = f"Cannot compile dataset '{dataset}' with test set '{test_set}'."
|
|
395
|
-
if test_set_conf.datasets is not None and dataset not in test_set_conf.datasets:
|
|
396
|
-
raise ConfigurationError(f"{error_msg_intro}\n Applicable datasets for test set '{test_set}' does not include dataset '{dataset}'.")
|
|
397
|
-
|
|
398
|
-
user_attributes = test_set_conf.user_attributes.copy() if test_set_conf.user_attributes is not None else {}
|
|
399
|
-
selections = test_set_conf.parameters.copy()
|
|
400
|
-
username, is_admin = user_attributes.pop("username", ""), user_attributes.pop("is_admin", False)
|
|
401
|
-
if test_set_conf.is_authenticated:
|
|
402
|
-
user = self._auth.User(username=username, is_admin=is_admin, **user_attributes)
|
|
403
|
-
elif dataset_conf.scope == mf.PermissionScope.PUBLIC:
|
|
404
|
-
user = None
|
|
405
|
-
else:
|
|
406
|
-
raise ConfigurationError(f"{error_msg_intro}\n Non-public datasets require a test set with 'user_attributes' section defined")
|
|
407
|
-
|
|
408
|
-
if dataset_conf.scope == mf.PermissionScope.PRIVATE and not is_admin:
|
|
409
|
-
raise ConfigurationError(f"{error_msg_intro}\n Private datasets require a test set with user_attribute 'is_admin' set to true")
|
|
410
|
-
|
|
411
|
-
# always_python_df is set to True for creating CSV files from results (when runquery is True)
|
|
412
|
-
dag = self._generate_dag(dataset, target_model_name=select, always_python_df=runquery)
|
|
413
|
-
await dag.execute(
|
|
414
|
-
self._param_args, self._param_cfg_set, self._context_func, user, selections,
|
|
415
|
-
runquery=runquery, recurse=recurse, default_traits=self._manifest_cfg.get_default_traits()
|
|
416
|
-
)
|
|
417
|
-
|
|
418
|
-
output_folder = Path(self._filepath, c.TARGET_FOLDER, c.COMPILE_FOLDER, dataset, test_set)
|
|
419
|
-
if output_folder.exists():
|
|
420
|
-
shutil.rmtree(output_folder)
|
|
421
|
-
output_folder.mkdir(parents=True, exist_ok=True)
|
|
422
|
-
|
|
423
|
-
def write_placeholders() -> None:
|
|
424
|
-
output_filepath = Path(output_folder, "placeholders.json")
|
|
425
|
-
with open(output_filepath, 'w') as f:
|
|
426
|
-
json.dump(dag.placeholders, f, indent=4)
|
|
427
|
-
|
|
428
|
-
def write_model_outputs(model: m.DataModel) -> None:
|
|
429
|
-
assert isinstance(model, m.QueryModel)
|
|
430
|
-
subfolder = c.DBVIEWS_FOLDER if model.model_type == m.ModelType.DBVIEW else c.FEDERATES_FOLDER
|
|
431
|
-
subpath = Path(output_folder, subfolder)
|
|
432
|
-
subpath.mkdir(parents=True, exist_ok=True)
|
|
433
|
-
if isinstance(model.compiled_query, mq.SqlModelQuery):
|
|
434
|
-
output_filepath = Path(subpath, model.name+'.sql')
|
|
435
|
-
query = model.compiled_query.query
|
|
436
|
-
with open(output_filepath, 'w') as f:
|
|
437
|
-
f.write(query)
|
|
438
|
-
if runquery and isinstance(model.result, pl.LazyFrame):
|
|
439
|
-
output_filepath = Path(subpath, model.name+'.csv')
|
|
440
|
-
model.result.collect().write_csv(output_filepath)
|
|
441
|
-
|
|
442
|
-
write_placeholders()
|
|
443
|
-
all_model_names = dag.get_all_query_models()
|
|
444
|
-
coroutines = [asyncio.to_thread(write_model_outputs, dag.models_dict[name]) for name in all_model_names]
|
|
445
|
-
await u.asyncio_gather(coroutines)
|
|
446
|
-
|
|
447
|
-
if recurse:
|
|
448
|
-
self._draw_dag(dag, output_folder)
|
|
449
|
-
|
|
450
|
-
if isinstance(dag.target_model, m.QueryModel) and dag.target_model.compiled_query is not None:
|
|
451
|
-
return dag.target_model.compiled_query.query
|
|
452
|
-
|
|
453
397
|
async def compile(
|
|
454
|
-
self, *,
|
|
455
|
-
|
|
398
|
+
self, *, selected_model: str | None = None, test_set: str | None = None, do_all_test_sets: bool = False,
|
|
399
|
+
runquery: bool = False, clear: bool = False, buildtime_only: bool = False, runtime_only: bool = False
|
|
456
400
|
) -> None:
|
|
457
401
|
"""
|
|
458
|
-
|
|
402
|
+
Compile models into the "target/compile" folder.
|
|
459
403
|
|
|
460
|
-
|
|
404
|
+
Behavior:
|
|
405
|
+
- Buildtime outputs: target/compile/buildtime/*.sql (for SQL build models) and dag.png
|
|
406
|
+
- Runtime outputs: target/compile/runtime/[test_set]/dbviews/*.sql, federates/*.sql, dag.png
|
|
407
|
+
If runquery=True, also write CSVs for runtime models.
|
|
408
|
+
- Options: clear entire compile folder first; compile only buildtime or only runtime.
|
|
461
409
|
|
|
462
410
|
Arguments:
|
|
463
|
-
dataset: The name of the dataset to compile. Ignored if "do_all_datasets" argument is True, but required (i.e., cannot be None) if "do_all_datasets" is False. Default is None.
|
|
464
|
-
do_all_datasets: If True, compile all datasets and ignore the "dataset" argument. Default is False.
|
|
465
411
|
selected_model: The name of the model to compile. If specified, the compiled SQL query is also printed in the terminal. If None, all models for the selected dataset are compiled. Default is None.
|
|
466
412
|
test_set: The name of the test set to compile with. If None, the default test set is used (which can vary by dataset). Ignored if `do_all_test_sets` argument is True. Default is None.
|
|
467
413
|
do_all_test_sets: Whether to compile all applicable test sets for the selected dataset(s). If True, the `test_set` argument is ignored. Default is False.
|
|
468
|
-
runquery
|
|
414
|
+
runquery: Whether to run all compiled queries and save each result as a CSV file. If True and `selected_model` is specified, all upstream models of the selected model is compiled as well. Default is False.
|
|
415
|
+
clear: Whether to clear the "target/compile/" folder before compiling. Default is False.
|
|
416
|
+
buildtime_only: Whether to compile only buildtime models. Default is False.
|
|
417
|
+
runtime_only: Whether to compile only runtime models. Default is False.
|
|
469
418
|
"""
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
419
|
+
border = "=" * 80
|
|
420
|
+
underlines = "-" * len(border)
|
|
421
|
+
|
|
422
|
+
compile_root = Path(self._filepath, c.TARGET_FOLDER, c.COMPILE_FOLDER)
|
|
423
|
+
if clear and compile_root.exists():
|
|
424
|
+
shutil.rmtree(compile_root)
|
|
425
|
+
|
|
426
|
+
models_dict = self._get_models_dict(always_python_df=False)
|
|
427
|
+
|
|
428
|
+
if selected_model is not None:
|
|
429
|
+
selected_model = u.normalize_name(selected_model)
|
|
430
|
+
if selected_model not in models_dict:
|
|
431
|
+
print(f"No such model found: {selected_model}")
|
|
432
|
+
return
|
|
433
|
+
if not isinstance(models_dict[selected_model], m.QueryModel):
|
|
434
|
+
print(f"Model '{selected_model}' is not a query model. Nothing to do.")
|
|
435
|
+
return
|
|
436
|
+
|
|
437
|
+
model_to_compile = None
|
|
438
|
+
|
|
439
|
+
# Buildtime compilation
|
|
440
|
+
if not runtime_only:
|
|
441
|
+
print(underlines)
|
|
442
|
+
print(f"Compiling buildtime models")
|
|
443
|
+
print(underlines)
|
|
444
|
+
|
|
445
|
+
buildtime_folder = Path(compile_root, c.COMPILE_BUILDTIME_FOLDER)
|
|
446
|
+
buildtime_folder.mkdir(parents=True, exist_ok=True)
|
|
447
|
+
|
|
448
|
+
def write_buildtime_model(model: m.DataModel, static_models: dict[str, m.StaticModel]) -> None:
|
|
449
|
+
if not isinstance(model, m.BuildModel):
|
|
450
|
+
return
|
|
451
|
+
|
|
452
|
+
model.compile_for_build(self._conn_args, static_models)
|
|
453
|
+
|
|
454
|
+
if isinstance(model.compiled_query, mq.SqlModelQuery):
|
|
455
|
+
out_path = Path(buildtime_folder, f"{model.name}.sql")
|
|
456
|
+
with open(out_path, 'w') as f:
|
|
457
|
+
f.write(model.compiled_query.query)
|
|
458
|
+
print(f"Successfully compiled build model: {model.name}")
|
|
459
|
+
elif isinstance(model.compiled_query, mq.PyModelQuery):
|
|
460
|
+
print(f"The build model '{model.name}' is in Python. Compilation for Python is not supported yet.")
|
|
461
|
+
|
|
462
|
+
static_models = self._get_static_models()
|
|
463
|
+
if selected_model is not None:
|
|
464
|
+
model_to_compile = models_dict[selected_model]
|
|
465
|
+
write_buildtime_model(model_to_compile, static_models)
|
|
478
466
|
else:
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
coroutines: list[t.Coroutine] = []
|
|
483
|
-
for dataset, selected_model in selected_models:
|
|
484
|
-
if do_all_test_sets:
|
|
485
|
-
for test_set_name in self._manifest_cfg.get_applicable_test_sets(dataset):
|
|
486
|
-
coroutine = self._write_dataset_outputs_given_test_set(dataset, selected_model, test_set_name, runquery, recurse)
|
|
487
|
-
coroutines.append(coroutine)
|
|
467
|
+
coros = [asyncio.to_thread(write_buildtime_model, m, static_models) for m in static_models.values()]
|
|
468
|
+
await u.asyncio_gather(coros)
|
|
488
469
|
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
queries = await u.asyncio_gather(coroutines)
|
|
470
|
+
print(underlines)
|
|
471
|
+
print()
|
|
493
472
|
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
473
|
+
# Runtime compilation
|
|
474
|
+
if not buildtime_only:
|
|
475
|
+
if do_all_test_sets:
|
|
476
|
+
test_set_names_set = set(self._manifest_cfg.selection_test_sets.keys())
|
|
477
|
+
test_set_names_set.add(c.DEFAULT_TEST_SET_NAME)
|
|
478
|
+
test_set_names = list(test_set_names_set)
|
|
479
|
+
else:
|
|
480
|
+
test_set_names = [test_set or c.DEFAULT_TEST_SET_NAME]
|
|
481
|
+
|
|
482
|
+
for ts_name in test_set_names:
|
|
483
|
+
print(underlines)
|
|
484
|
+
print(f"Compiling runtime models (test set '{ts_name}')")
|
|
485
|
+
print(underlines)
|
|
486
|
+
|
|
487
|
+
# Build user and selections from test set config if present
|
|
488
|
+
ts_conf = self._manifest_cfg.selection_test_sets.get(ts_name, self._manifest_cfg.get_default_test_set())
|
|
489
|
+
# Separate base fields from custom fields
|
|
490
|
+
access_level = ts_conf.user.access_level
|
|
491
|
+
custom_fields = self._auth.CustomUserFields(**ts_conf.user.custom_fields)
|
|
492
|
+
if access_level == "guest":
|
|
493
|
+
user = GuestUser(username="", custom_fields=custom_fields)
|
|
494
|
+
else:
|
|
495
|
+
user = RegisteredUser(username="", access_level=access_level, custom_fields=custom_fields)
|
|
496
|
+
|
|
497
|
+
# Generate DAG across all models. When runquery=True, force models to produce Python dataframes so CSVs can be written.
|
|
498
|
+
dag = await self._get_compiled_dag(
|
|
499
|
+
user=user, selections=ts_conf.parameters, configurables=ts_conf.configurables, always_python_df=runquery,
|
|
500
|
+
)
|
|
501
|
+
if runquery:
|
|
502
|
+
await dag._run_models()
|
|
503
|
+
|
|
504
|
+
# Prepare output folders
|
|
505
|
+
runtime_folder = Path(compile_root, c.COMPILE_RUNTIME_FOLDER, ts_name)
|
|
506
|
+
dbviews_folder = Path(runtime_folder, c.DBVIEWS_FOLDER)
|
|
507
|
+
federates_folder = Path(runtime_folder, c.FEDERATES_FOLDER)
|
|
508
|
+
dbviews_folder.mkdir(parents=True, exist_ok=True)
|
|
509
|
+
federates_folder.mkdir(parents=True, exist_ok=True)
|
|
510
|
+
with open(Path(runtime_folder, "placeholders.json"), "w") as f:
|
|
511
|
+
json.dump(dag.placeholders, f)
|
|
512
|
+
|
|
513
|
+
# Function to write runtime models
|
|
514
|
+
def write_runtime_model(model: m.DataModel) -> None:
|
|
515
|
+
if not isinstance(model, m.QueryModel):
|
|
516
|
+
return
|
|
517
|
+
|
|
518
|
+
if model.model_type not in (m.ModelType.DBVIEW, m.ModelType.FEDERATE):
|
|
519
|
+
return
|
|
520
|
+
|
|
521
|
+
subfolder = dbviews_folder if model.model_type == m.ModelType.DBVIEW else federates_folder
|
|
522
|
+
model_type = "dbview" if model.model_type == m.ModelType.DBVIEW else "federate"
|
|
523
|
+
|
|
524
|
+
if isinstance(model.compiled_query, mq.SqlModelQuery):
|
|
525
|
+
out_sql = Path(subfolder, f"{model.name}.sql")
|
|
526
|
+
with open(out_sql, 'w') as f:
|
|
527
|
+
f.write(model.compiled_query.query)
|
|
528
|
+
print(f"Successfully compiled {model_type} model: {model.name}")
|
|
529
|
+
elif isinstance(model.compiled_query, mq.PyModelQuery):
|
|
530
|
+
print(f"The {model_type} model '{model.name}' is in Python. Compilation for Python is not supported yet.")
|
|
531
|
+
|
|
532
|
+
if runquery and isinstance(model.result, pl.LazyFrame):
|
|
533
|
+
out_csv = Path(subfolder, f"{model.name}.csv")
|
|
534
|
+
model.result.collect().write_csv(out_csv)
|
|
535
|
+
print(f"Successfully created CSV for {model_type} model: {model.name}")
|
|
536
|
+
|
|
537
|
+
# If selected_model is provided for runtime, only emit that model's outputs
|
|
538
|
+
if selected_model is not None:
|
|
539
|
+
model_to_compile = dag.models_dict[selected_model]
|
|
540
|
+
write_runtime_model(model_to_compile)
|
|
541
|
+
else:
|
|
542
|
+
coros = [asyncio.to_thread(write_runtime_model, model) for model in dag.models_dict.values()]
|
|
543
|
+
await u.asyncio_gather(coros)
|
|
544
|
+
|
|
545
|
+
print(underlines)
|
|
546
|
+
print()
|
|
547
|
+
|
|
548
|
+
print(f"All compilations complete! See the '{c.TARGET_FOLDER}/{c.COMPILE_FOLDER}/' folder for results.")
|
|
549
|
+
if model_to_compile and isinstance(model_to_compile, m.QueryModel) and isinstance(model_to_compile.compiled_query, mq.SqlModelQuery):
|
|
550
|
+
print()
|
|
551
|
+
print(border)
|
|
552
|
+
print(f"Compiled SQL query for model '{model_to_compile.name}':")
|
|
553
|
+
print(underlines)
|
|
554
|
+
print(model_to_compile.compiled_query.query)
|
|
555
|
+
print(border)
|
|
498
556
|
print()
|
|
499
557
|
|
|
500
|
-
def _permission_error(self, user:
|
|
501
|
-
|
|
502
|
-
return InvalidInputError(403, f"Unauthorized access to {data_type}", f"User{username} does not have permission to access {scope} {data_type}: {data_name}")
|
|
558
|
+
def _permission_error(self, user: AbstractUser, data_type: str, data_name: str, scope: str) -> InvalidInputError:
|
|
559
|
+
return InvalidInputError(403, f"unauthorized_access_to_{data_type}", f"User '{user}' does not have permission to access {scope} {data_type}: {data_name}")
|
|
503
560
|
|
|
504
561
|
def seed(self, name: str) -> pl.LazyFrame:
|
|
505
562
|
"""
|
|
@@ -535,7 +592,8 @@ class SquirrelsProject:
|
|
|
535
592
|
)
|
|
536
593
|
|
|
537
594
|
async def dataset(
|
|
538
|
-
self, name: str, *, selections: dict[str, t.Any] = {}, user:
|
|
595
|
+
self, name: str, *, selections: dict[str, t.Any] = {}, user: AbstractUser | None = None, require_auth: bool = True,
|
|
596
|
+
configurables: dict[str, str] = {}
|
|
539
597
|
) -> dr.DatasetResult:
|
|
540
598
|
"""
|
|
541
599
|
Async method to retrieve a dataset as a DatasetResult object (with metadata) given parameter selections.
|
|
@@ -548,14 +606,17 @@ class SquirrelsProject:
|
|
|
548
606
|
Returns:
|
|
549
607
|
A DatasetResult object containing the dataset result (as a polars DataFrame), its description, and the column details.
|
|
550
608
|
"""
|
|
609
|
+
if user is None:
|
|
610
|
+
user = self._guest_user
|
|
611
|
+
|
|
551
612
|
scope = self._manifest_cfg.datasets[name].scope
|
|
552
613
|
if require_auth and not self._auth.can_user_access_scope(user, scope):
|
|
553
614
|
raise self._permission_error(user, "dataset", name, scope.name)
|
|
554
615
|
|
|
555
616
|
dag = self._generate_dag(name)
|
|
617
|
+
configurables = {**self._manifest_cfg.get_default_configurables(name), **configurables}
|
|
556
618
|
await dag.execute(
|
|
557
|
-
self._param_args, self._param_cfg_set, self._context_func, user, dict(selections),
|
|
558
|
-
default_traits=self._manifest_cfg.get_default_traits()
|
|
619
|
+
self._param_args, self._param_cfg_set, self._context_func, user, dict(selections), configurables=configurables
|
|
559
620
|
)
|
|
560
621
|
assert isinstance(dag.target_model.result, pl.LazyFrame)
|
|
561
622
|
return dr.DatasetResult(
|
|
@@ -564,7 +625,8 @@ class SquirrelsProject:
|
|
|
564
625
|
)
|
|
565
626
|
|
|
566
627
|
async def dashboard(
|
|
567
|
-
self, name: str, *, selections: dict[str, t.Any] = {}, user:
|
|
628
|
+
self, name: str, *, selections: dict[str, t.Any] = {}, user: AbstractUser | None = None, dashboard_type: t.Type[T] = d.PngDashboard,
|
|
629
|
+
configurables: dict[str, str] = {}
|
|
568
630
|
) -> T:
|
|
569
631
|
"""
|
|
570
632
|
Async method to retrieve a dashboard given parameter selections.
|
|
@@ -578,13 +640,18 @@ class SquirrelsProject:
|
|
|
578
640
|
Returns:
|
|
579
641
|
The dashboard type specified by the "dashboard_type" argument.
|
|
580
642
|
"""
|
|
643
|
+
if user is None:
|
|
644
|
+
user = self._guest_user
|
|
645
|
+
|
|
581
646
|
scope = self._dashboards[name].config.scope
|
|
582
647
|
if not self._auth.can_user_access_scope(user, scope):
|
|
583
648
|
raise self._permission_error(user, "dashboard", name, scope.name)
|
|
584
649
|
|
|
585
650
|
async def get_dataset_df(dataset_name: str, fixed_params: dict[str, t.Any]) -> pl.DataFrame:
|
|
586
651
|
final_selections = {**selections, **fixed_params}
|
|
587
|
-
result = await self.dataset(
|
|
652
|
+
result = await self.dataset(
|
|
653
|
+
dataset_name, selections=final_selections, user=user, require_auth=False, configurables=configurables
|
|
654
|
+
)
|
|
588
655
|
return result.df
|
|
589
656
|
|
|
590
657
|
args = d.DashboardArgs(self._param_args, get_dataset_df)
|
|
@@ -594,12 +661,62 @@ class SquirrelsProject:
|
|
|
594
661
|
raise KeyError(f"No dashboard file found for: {name}")
|
|
595
662
|
|
|
596
663
|
async def query_models(
|
|
597
|
-
self, sql_query: str, *, selections: dict[str, t.Any] = {},
|
|
664
|
+
self, sql_query: str, *, user: AbstractUser | None = None, selections: dict[str, t.Any] = {}, configurables: dict[str, str] = {}
|
|
598
665
|
) -> dr.DatasetResult:
|
|
599
|
-
|
|
666
|
+
if user is None:
|
|
667
|
+
user = self._guest_user
|
|
668
|
+
|
|
669
|
+
dag = await self._get_compiled_dag(user=user, sql_query=sql_query, selections=selections, configurables=configurables)
|
|
600
670
|
await dag._run_models()
|
|
601
671
|
assert isinstance(dag.target_model.result, pl.LazyFrame)
|
|
602
672
|
return dr.DatasetResult(
|
|
603
673
|
target_model_config=dag.target_model.model_config,
|
|
604
674
|
df=dag.target_model.result.collect().with_row_index("_row_num", offset=1)
|
|
605
675
|
)
|
|
676
|
+
|
|
677
|
+
async def get_compiled_model_query(
|
|
678
|
+
self, model_name: str, *, user: AbstractUser | None = None, selections: dict[str, t.Any] = {}, configurables: dict[str, str] = {}
|
|
679
|
+
) -> rm.CompiledQueryModel:
|
|
680
|
+
"""
|
|
681
|
+
Compile the specified data model and return its language and compiled definition.
|
|
682
|
+
"""
|
|
683
|
+
if user is None:
|
|
684
|
+
user = self._guest_user
|
|
685
|
+
|
|
686
|
+
name = u.normalize_name(model_name)
|
|
687
|
+
models_dict = self._get_models_dict(always_python_df=False)
|
|
688
|
+
if name not in models_dict:
|
|
689
|
+
raise InvalidInputError(404, "model_not_found", f"No data model found with name: {model_name}")
|
|
690
|
+
|
|
691
|
+
model = models_dict[name]
|
|
692
|
+
# Only build, dbview, and federate models support runtime compiled definition in this context
|
|
693
|
+
if not isinstance(model, (m.BuildModel, m.DbviewModel, m.FederateModel)):
|
|
694
|
+
raise InvalidInputError(400, "unsupported_model_type", "Only build, dbview, and federate models currently support compiled definition via this endpoint")
|
|
695
|
+
|
|
696
|
+
# Build a DAG with this model as the target, without a dataset context
|
|
697
|
+
model.is_target = True
|
|
698
|
+
dag = m.DAG(None, model, models_dict, self._datalake_db_path, self._logger)
|
|
699
|
+
|
|
700
|
+
cfg = {**self._manifest_cfg.get_default_configurables(), **configurables}
|
|
701
|
+
await dag.execute(
|
|
702
|
+
self._param_args, self._param_cfg_set, self._context_func, user, selections, runquery=False, configurables=cfg
|
|
703
|
+
)
|
|
704
|
+
|
|
705
|
+
language = "sql" if isinstance(model.query_file, mq.SqlQueryFile) else "python"
|
|
706
|
+
if isinstance(model, m.BuildModel):
|
|
707
|
+
# Compile SQL build models; Python build models not yet supported
|
|
708
|
+
if isinstance(model.query_file, mq.SqlQueryFile):
|
|
709
|
+
static_models = self._get_static_models()
|
|
710
|
+
compiled = model._compile_sql_model(model.query_file, self._conn_args, static_models)
|
|
711
|
+
definition = compiled.query
|
|
712
|
+
else:
|
|
713
|
+
definition = "# Compiling Python build models is currently not supported. This will be available in a future version of Squirrels..."
|
|
714
|
+
elif isinstance(model.compiled_query, mq.SqlModelQuery):
|
|
715
|
+
definition = model.compiled_query.query
|
|
716
|
+
elif isinstance(model.compiled_query, mq.PyModelQuery):
|
|
717
|
+
definition = "# Compiling Python data models is currently not supported. This will be available in a future version of Squirrels..."
|
|
718
|
+
else:
|
|
719
|
+
raise NotImplementedError(f"Query type not supported: {model.compiled_query.__class__.__name__}")
|
|
720
|
+
|
|
721
|
+
return rm.CompiledQueryModel(language=language, definition=definition, placeholders=dag.placeholders)
|
|
722
|
+
|