squirrels 0.5.0b3__py3-none-any.whl → 0.6.0.post0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- squirrels/__init__.py +4 -0
- squirrels/_api_routes/__init__.py +5 -0
- squirrels/_api_routes/auth.py +337 -0
- squirrels/_api_routes/base.py +196 -0
- squirrels/_api_routes/dashboards.py +156 -0
- squirrels/_api_routes/data_management.py +148 -0
- squirrels/_api_routes/datasets.py +220 -0
- squirrels/_api_routes/project.py +289 -0
- squirrels/_api_server.py +440 -792
- squirrels/_arguments/__init__.py +0 -0
- squirrels/_arguments/{_init_time_args.py → init_time_args.py} +23 -43
- squirrels/_arguments/{_run_time_args.py → run_time_args.py} +32 -68
- squirrels/_auth.py +590 -264
- squirrels/_command_line.py +130 -58
- squirrels/_compile_prompts.py +147 -0
- squirrels/_connection_set.py +16 -15
- squirrels/_constants.py +36 -11
- squirrels/_dashboards.py +179 -0
- squirrels/_data_sources.py +40 -34
- squirrels/_dataset_types.py +16 -11
- squirrels/_env_vars.py +209 -0
- squirrels/_exceptions.py +9 -37
- squirrels/_http_error_responses.py +52 -0
- squirrels/_initializer.py +7 -6
- squirrels/_logging.py +121 -0
- squirrels/_manifest.py +155 -77
- squirrels/_mcp_server.py +578 -0
- squirrels/_model_builder.py +11 -55
- squirrels/_model_configs.py +5 -5
- squirrels/_model_queries.py +1 -1
- squirrels/_models.py +276 -143
- squirrels/_package_data/base_project/.env +1 -24
- squirrels/_package_data/base_project/.env.example +31 -17
- squirrels/_package_data/base_project/connections.yml +4 -3
- squirrels/_package_data/base_project/dashboards/dashboard_example.py +13 -7
- squirrels/_package_data/base_project/dashboards/dashboard_example.yml +6 -6
- squirrels/_package_data/base_project/docker/Dockerfile +2 -2
- squirrels/_package_data/base_project/docker/compose.yml +1 -1
- squirrels/_package_data/base_project/duckdb_init.sql +1 -0
- squirrels/_package_data/base_project/models/builds/build_example.py +2 -2
- squirrels/_package_data/base_project/models/dbviews/dbview_example.sql +7 -2
- squirrels/_package_data/base_project/models/dbviews/dbview_example.yml +16 -10
- squirrels/_package_data/base_project/models/federates/federate_example.py +27 -17
- squirrels/_package_data/base_project/models/federates/federate_example.sql +3 -7
- squirrels/_package_data/base_project/models/federates/federate_example.yml +7 -7
- squirrels/_package_data/base_project/models/sources.yml +5 -6
- squirrels/_package_data/base_project/parameters.yml +24 -38
- squirrels/_package_data/base_project/pyconfigs/connections.py +8 -3
- squirrels/_package_data/base_project/pyconfigs/context.py +26 -14
- squirrels/_package_data/base_project/pyconfigs/parameters.py +124 -81
- squirrels/_package_data/base_project/pyconfigs/user.py +48 -15
- squirrels/_package_data/base_project/resources/public/.gitkeep +0 -0
- squirrels/_package_data/base_project/seeds/seed_categories.yml +1 -1
- squirrels/_package_data/base_project/seeds/seed_subcategories.yml +1 -1
- squirrels/_package_data/base_project/squirrels.yml.j2 +21 -31
- squirrels/_package_data/templates/login_successful.html +53 -0
- squirrels/_package_data/templates/squirrels_studio.html +22 -0
- squirrels/_parameter_configs.py +43 -22
- squirrels/_parameter_options.py +1 -1
- squirrels/_parameter_sets.py +41 -30
- squirrels/_parameters.py +560 -123
- squirrels/_project.py +487 -277
- squirrels/_py_module.py +71 -10
- squirrels/_request_context.py +33 -0
- squirrels/_schemas/__init__.py +0 -0
- squirrels/_schemas/auth_models.py +83 -0
- squirrels/_schemas/query_param_models.py +70 -0
- squirrels/_schemas/request_models.py +26 -0
- squirrels/_schemas/response_models.py +286 -0
- squirrels/_seeds.py +52 -13
- squirrels/_sources.py +29 -23
- squirrels/_utils.py +221 -42
- squirrels/_version.py +1 -3
- squirrels/arguments.py +7 -2
- squirrels/auth.py +4 -0
- squirrels/connections.py +2 -0
- squirrels/dashboards.py +3 -1
- squirrels/data_sources.py +6 -0
- squirrels/parameter_options.py +5 -0
- squirrels/parameters.py +5 -0
- squirrels/types.py +10 -3
- squirrels-0.6.0.post0.dist-info/METADATA +148 -0
- squirrels-0.6.0.post0.dist-info/RECORD +101 -0
- {squirrels-0.5.0b3.dist-info → squirrels-0.6.0.post0.dist-info}/WHEEL +1 -1
- squirrels/_api_response_models.py +0 -190
- squirrels/_dashboard_types.py +0 -82
- squirrels/_dashboards_io.py +0 -79
- squirrels-0.5.0b3.dist-info/METADATA +0 -110
- squirrels-0.5.0b3.dist-info/RECORD +0 -80
- /squirrels/_package_data/base_project/{assets → resources}/expenses.db +0 -0
- /squirrels/_package_data/base_project/{assets → resources}/weather.db +0 -0
- {squirrels-0.5.0b3.dist-info → squirrels-0.6.0.post0.dist-info}/entry_points.txt +0 -0
- {squirrels-0.5.0b3.dist-info → squirrels-0.6.0.post0.dist-info}/licenses/LICENSE +0 -0
squirrels/_models.py
CHANGED
|
@@ -5,25 +5,27 @@ from abc import ABCMeta, abstractmethod
|
|
|
5
5
|
from enum import Enum
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
import asyncio, os, re, time, duckdb, sqlglot
|
|
8
|
-
import polars as pl, pandas as pd
|
|
8
|
+
import polars as pl, pandas as pd
|
|
9
9
|
|
|
10
|
-
from . import _constants as c, _utils as u, _py_module as pm, _model_queries as mq, _model_configs as mc, _sources as src
|
|
10
|
+
from . import _constants as c, _utils as u, _py_module as pm, _model_queries as mq, _model_configs as mc, _sources as src
|
|
11
|
+
from ._schemas import response_models as rm
|
|
11
12
|
from ._exceptions import FileExecutionError, InvalidInputError
|
|
12
|
-
from ._arguments.
|
|
13
|
-
from ._auth import
|
|
13
|
+
from ._arguments.run_time_args import ContextArgs, ModelArgs, BuildModelArgs
|
|
14
|
+
from ._auth import AbstractUser
|
|
14
15
|
from ._connection_set import ConnectionsArgs, ConnectionSet, ConnectionProperties
|
|
15
|
-
from ._manifest import DatasetConfig
|
|
16
|
+
from ._manifest import DatasetConfig, ConnectionTypeEnum
|
|
16
17
|
from ._parameter_sets import ParameterConfigsSet, ParametersArgs, ParameterSet
|
|
18
|
+
from ._env_vars import SquirrelsEnvVars
|
|
17
19
|
|
|
18
20
|
ContextFunc = Callable[[dict[str, Any], ContextArgs], None]
|
|
19
21
|
|
|
20
22
|
|
|
21
23
|
class ModelType(Enum):
|
|
24
|
+
SEED = "seed"
|
|
22
25
|
SOURCE = "source"
|
|
26
|
+
BUILD = "build"
|
|
23
27
|
DBVIEW = "dbview"
|
|
24
28
|
FEDERATE = "federate"
|
|
25
|
-
SEED = "seed"
|
|
26
|
-
BUILD = "build"
|
|
27
29
|
|
|
28
30
|
|
|
29
31
|
@dataclass
|
|
@@ -42,7 +44,6 @@ class DataModel(metaclass=ABCMeta):
|
|
|
42
44
|
|
|
43
45
|
_: KW_ONLY
|
|
44
46
|
logger: u.Logger = field(default_factory=lambda: u.Logger(""))
|
|
45
|
-
env_vars: dict[str, str] = field(default_factory=dict)
|
|
46
47
|
conn_set: ConnectionSet = field(default_factory=ConnectionSet)
|
|
47
48
|
|
|
48
49
|
@property
|
|
@@ -78,15 +79,15 @@ class DataModel(metaclass=ABCMeta):
|
|
|
78
79
|
self.confirmed_no_cycles = True
|
|
79
80
|
return terminal_nodes
|
|
80
81
|
|
|
81
|
-
def _load_duckdb_view_to_python_df(self, conn: duckdb.DuckDBPyConnection, *,
|
|
82
|
-
table_name = ("
|
|
82
|
+
def _load_duckdb_view_to_python_df(self, conn: duckdb.DuckDBPyConnection, *, use_datalake: bool = False) -> pl.LazyFrame:
|
|
83
|
+
table_name = ("vdl." if use_datalake else "") + self.name
|
|
83
84
|
try:
|
|
84
85
|
return conn.sql(f"FROM {table_name}").pl().lazy()
|
|
85
86
|
except duckdb.CatalogException as e:
|
|
86
87
|
raise u.ConfigurationError(f'Failed to load duckdb table or view "{self.name}" to python dataframe') from e
|
|
87
88
|
|
|
88
89
|
def _run_sql_query_on_connection(self, connection_name: str, query: str, placeholders: dict = {}) -> pl.DataFrame:
|
|
89
|
-
self.logger.
|
|
90
|
+
self.logger.debug(f"Running SQL query on connection '{connection_name}':\n{query}")
|
|
90
91
|
return self.conn_set.run_sql_query_from_conn_name(query, connection_name, placeholders)
|
|
91
92
|
|
|
92
93
|
async def _trigger(self, conn: duckdb.DuckDBPyConnection, placeholders: dict = {}) -> None:
|
|
@@ -132,11 +133,13 @@ class DataModel(metaclass=ABCMeta):
|
|
|
132
133
|
|
|
133
134
|
def _create_table_from_df(self, conn: duckdb.DuckDBPyConnection, query_result: pl.LazyFrame | pd.DataFrame):
|
|
134
135
|
local_conn = conn.cursor()
|
|
136
|
+
# local_conn = conn
|
|
135
137
|
try:
|
|
136
|
-
|
|
137
|
-
local_conn.execute(f"CREATE OR REPLACE TABLE {self.name} AS
|
|
138
|
+
assert query_result is not None
|
|
139
|
+
local_conn.execute(f"CREATE OR REPLACE TABLE {self.name} AS FROM query_result")
|
|
138
140
|
finally:
|
|
139
141
|
local_conn.close()
|
|
142
|
+
# pass
|
|
140
143
|
|
|
141
144
|
def process_pass_through_columns(self, models_dict: dict[str, DataModel]) -> None:
|
|
142
145
|
pass
|
|
@@ -171,19 +174,26 @@ class StaticModel(DataModel):
|
|
|
171
174
|
def _get_result(self, conn: duckdb.DuckDBPyConnection) -> pl.LazyFrame:
|
|
172
175
|
local_conn = conn.cursor()
|
|
173
176
|
try:
|
|
174
|
-
return self._load_duckdb_view_to_python_df(local_conn,
|
|
177
|
+
return self._load_duckdb_view_to_python_df(local_conn, use_datalake=True)
|
|
175
178
|
except Exception as e:
|
|
176
|
-
raise InvalidInputError(
|
|
179
|
+
raise InvalidInputError(409, f'dependent_data_model_not_found', f'Model "{self.name}" depends on static data models that cannot be found. Try building the Virtual Data Lake (VDL) first.')
|
|
177
180
|
finally:
|
|
178
181
|
local_conn.close()
|
|
179
182
|
|
|
180
183
|
async def run_model(self, conn: duckdb.DuckDBPyConnection, placeholders: dict = {}) -> None:
|
|
181
|
-
start = time.time()
|
|
182
|
-
|
|
183
184
|
if (self.needs_python_df or self.is_target) and self.result is None:
|
|
185
|
+
start = time.time()
|
|
186
|
+
|
|
184
187
|
self.result = await asyncio.to_thread(self._get_result, conn)
|
|
185
|
-
|
|
186
|
-
|
|
188
|
+
|
|
189
|
+
self.logger.log_activity_time(
|
|
190
|
+
f"loading {self.model_type.value} model '{self.name}' into memory", start,
|
|
191
|
+
additional_data={
|
|
192
|
+
"activity": "loading static data model into memory",
|
|
193
|
+
"model_name": self.name,
|
|
194
|
+
"model_type": self.model_type.value
|
|
195
|
+
}
|
|
196
|
+
)
|
|
187
197
|
|
|
188
198
|
await super().run_model(conn, placeholders)
|
|
189
199
|
|
|
@@ -224,10 +234,18 @@ class Seed(StaticModel):
|
|
|
224
234
|
start = time.time()
|
|
225
235
|
|
|
226
236
|
print(f"[{u.get_current_time()}] 🔨 BUILDING: seed model '{self.name}'")
|
|
227
|
-
await asyncio.to_thread(self._create_table_from_df, conn, self.result)
|
|
237
|
+
# await asyncio.to_thread(self._create_table_from_df, conn, self.result)
|
|
238
|
+
self._create_table_from_df(conn, self.result) # without threading
|
|
228
239
|
|
|
229
240
|
print(f"[{u.get_current_time()}] ✅ FINISHED: seed model '{self.name}'")
|
|
230
|
-
self.logger.log_activity_time(
|
|
241
|
+
self.logger.log_activity_time(
|
|
242
|
+
f"building seed model '{self.name}' into VDL", start,
|
|
243
|
+
additional_data={
|
|
244
|
+
"activity": "building data model into VDL",
|
|
245
|
+
"model_name": self.name,
|
|
246
|
+
"model_type": self.model_type.value
|
|
247
|
+
}
|
|
248
|
+
)
|
|
231
249
|
|
|
232
250
|
await super().build_model(conn, full_refresh)
|
|
233
251
|
|
|
@@ -239,24 +257,32 @@ class SourceModel(StaticModel):
|
|
|
239
257
|
@property
|
|
240
258
|
def model_type(self) -> ModelType:
|
|
241
259
|
return ModelType.SOURCE
|
|
260
|
+
|
|
261
|
+
@property
|
|
262
|
+
def connection_props(self) -> ConnectionProperties:
|
|
263
|
+
conn_name = self.model_config.get_connection()
|
|
264
|
+
conn_props = self.conn_set.get_connection(conn_name)
|
|
265
|
+
if isinstance(conn_props, ConnectionProperties):
|
|
266
|
+
return conn_props
|
|
267
|
+
raise u.ConfigurationError(f'Unable to use connection "{conn_name}" for source "{self.name}". Connection "{conn_name}" must be a ConnectionProperties object')
|
|
242
268
|
|
|
243
269
|
@property
|
|
244
270
|
def is_queryable(self) -> bool:
|
|
245
|
-
|
|
271
|
+
connection_props = self.connection_props
|
|
272
|
+
return self.model_config.load_to_vdl or connection_props.type == ConnectionTypeEnum.DUCKDB
|
|
246
273
|
|
|
247
274
|
def _build_source_model(self, conn: duckdb.DuckDBPyConnection, full_refresh: bool) -> None:
|
|
248
275
|
local_conn = conn.cursor()
|
|
276
|
+
# local_conn = conn
|
|
277
|
+
|
|
278
|
+
local_conn.begin()
|
|
249
279
|
try:
|
|
250
280
|
source = self.model_config
|
|
251
281
|
conn_name = source.get_connection()
|
|
252
282
|
|
|
253
|
-
connection_props = self.
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
attach_uri = connection_props.attach_uri_for_duckdb
|
|
257
|
-
else:
|
|
258
|
-
raise u.ConfigurationError(f'Unable to use connection "{conn_name}" for source "{self.name}". Connection "{conn_name}" must be a ConnectionProperties object')
|
|
259
|
-
|
|
283
|
+
connection_props = self.connection_props
|
|
284
|
+
dialect = connection_props.dialect
|
|
285
|
+
attach_uri = connection_props.attach_uri_for_duckdb
|
|
260
286
|
if attach_uri is None:
|
|
261
287
|
raise u.ConfigurationError(f'Loading to duckdb is not supported for source "{self.name}" since its connection "{conn_name}" uses an unsupported dialect')
|
|
262
288
|
|
|
@@ -268,8 +294,9 @@ class SourceModel(StaticModel):
|
|
|
268
294
|
new_table_name = self.name
|
|
269
295
|
|
|
270
296
|
if len(source.columns) == 0:
|
|
271
|
-
stmt = f"CREATE OR REPLACE TABLE {new_table_name} AS
|
|
297
|
+
stmt = f"CREATE OR REPLACE TABLE {new_table_name} AS FROM db_{conn_name}.{table_name}"
|
|
272
298
|
u.run_duckdb_stmt(self.logger, local_conn, stmt)
|
|
299
|
+
local_conn.commit()
|
|
273
300
|
return
|
|
274
301
|
|
|
275
302
|
increasing_column = source.update_hints.increasing_column
|
|
@@ -296,25 +323,44 @@ class SourceModel(StaticModel):
|
|
|
296
323
|
if max_val_of_incr_col is None:
|
|
297
324
|
recreate_table = True
|
|
298
325
|
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
326
|
+
query = source.get_query_for_upsert(dialect, conn_name, table_name, max_val_of_incr_col, full_refresh=recreate_table)
|
|
327
|
+
|
|
328
|
+
primary_keys = ", ".join(source.primary_key) if source.primary_key else ""
|
|
329
|
+
match_condition = f"USING ({primary_keys})" if primary_keys else "ON false"
|
|
330
|
+
stmt = (
|
|
331
|
+
f"MERGE INTO {new_table_name} "
|
|
332
|
+
f"USING ({query}) AS src "
|
|
333
|
+
f"{match_condition} "
|
|
334
|
+
f"WHEN MATCHED THEN UPDATE "
|
|
335
|
+
f"WHEN NOT MATCHED THEN INSERT BY NAME"
|
|
336
|
+
)
|
|
303
337
|
u.run_duckdb_stmt(self.logger, local_conn, stmt)
|
|
338
|
+
|
|
339
|
+
local_conn.commit()
|
|
340
|
+
|
|
304
341
|
finally:
|
|
305
342
|
local_conn.close()
|
|
343
|
+
# pass
|
|
306
344
|
|
|
307
345
|
async def build_model(self, conn: duckdb.DuckDBPyConnection, full_refresh: bool) -> None:
|
|
308
|
-
if self.model_config.
|
|
346
|
+
if self.model_config.load_to_vdl:
|
|
309
347
|
start = time.time()
|
|
310
348
|
print(f"[{u.get_current_time()}] 🔨 BUILDING: source model '{self.name}'")
|
|
311
349
|
|
|
312
|
-
await asyncio.to_thread(self._build_source_model, conn, full_refresh)
|
|
350
|
+
# await asyncio.to_thread(self._build_source_model, conn, full_refresh)
|
|
351
|
+
self._build_source_model(conn, full_refresh) # without threading
|
|
313
352
|
|
|
314
353
|
print(f"[{u.get_current_time()}] ✅ FINISHED: source model '{self.name}'")
|
|
315
|
-
self.logger.log_activity_time(
|
|
354
|
+
self.logger.log_activity_time(
|
|
355
|
+
f"building source model '{self.name}' into VDL", start,
|
|
356
|
+
additional_data={
|
|
357
|
+
"activity": "building data model into VDL",
|
|
358
|
+
"model_name": self.name,
|
|
359
|
+
"model_type": self.model_type.value
|
|
360
|
+
}
|
|
361
|
+
)
|
|
316
362
|
|
|
317
|
-
|
|
363
|
+
await super().build_model(conn, full_refresh)
|
|
318
364
|
|
|
319
365
|
|
|
320
366
|
@dataclass
|
|
@@ -337,10 +383,16 @@ class QueryModel(DataModel):
|
|
|
337
383
|
raise u.ConfigurationError(f'Model "{self.name}" references unknown model "{dependent_model_name}"')
|
|
338
384
|
|
|
339
385
|
dep_model = models_dict[dependent_model_name]
|
|
340
|
-
if isinstance(dep_model, SourceModel) and not dep_model.model_config.
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
)
|
|
386
|
+
if isinstance(dep_model, SourceModel) and not dep_model.model_config.load_to_vdl:
|
|
387
|
+
# Allow when caller is Build or Federate AND the source connection is duckdb; else error
|
|
388
|
+
conn_name = dep_model.model_config.get_connection()
|
|
389
|
+
conn_props = self.conn_set.get_connection(conn_name)
|
|
390
|
+
is_duckdb_conn = isinstance(conn_props, ConnectionProperties) and conn_props.type == ConnectionTypeEnum.DUCKDB
|
|
391
|
+
if not is_duckdb_conn:
|
|
392
|
+
raise u.ConfigurationError(
|
|
393
|
+
f'Model "{self.name}" cannot reference source model "{dependent_model_name}". '
|
|
394
|
+
'To be referenced by a build or federate model, the source must have load_to_vdl=True or a duckdb connection type.'
|
|
395
|
+
)
|
|
344
396
|
|
|
345
397
|
self.model_config.depends_on.add(dependent_model_name)
|
|
346
398
|
return dependent_model_name
|
|
@@ -355,10 +407,10 @@ class QueryModel(DataModel):
|
|
|
355
407
|
def _get_compile_sql_model_args_from_ctx_args(
|
|
356
408
|
self, ctx: dict[str, Any], ctx_args: ContextArgs
|
|
357
409
|
) -> dict[str, Any]:
|
|
358
|
-
is_placeholder = lambda placeholder: placeholder in ctx_args.
|
|
410
|
+
is_placeholder = lambda placeholder: placeholder in ctx_args._placeholders
|
|
359
411
|
kwargs = {
|
|
360
412
|
"proj_vars": ctx_args.proj_vars, "env_vars": ctx_args.env_vars, "user": ctx_args.user, "prms": ctx_args.prms,
|
|
361
|
-
"
|
|
413
|
+
"configurables": ctx_args.configurables, "ctx": ctx, "is_placeholder": is_placeholder, "set_placeholder": ctx_args.set_placeholder,
|
|
362
414
|
"param_exists": ctx_args.param_exists
|
|
363
415
|
}
|
|
364
416
|
return kwargs
|
|
@@ -417,7 +469,7 @@ class QueryModel(DataModel):
|
|
|
417
469
|
|
|
418
470
|
# Copy metadata from upstream column
|
|
419
471
|
col.type = upstream_col.type if col.type == "" else col.type
|
|
420
|
-
col.condition = upstream_col.condition if col.condition ==
|
|
472
|
+
col.condition = upstream_col.condition if col.condition == [] else col.condition
|
|
421
473
|
col.description = upstream_col.description if col.description == "" else col.description
|
|
422
474
|
col.category = upstream_col.category if col.category == mc.ColumnCategory.MISC else col.category
|
|
423
475
|
|
|
@@ -432,7 +484,7 @@ class QueryModel(DataModel):
|
|
|
432
484
|
def _log_sql_to_run(self, sql: str, placeholders: dict[str, Any]) -> None:
|
|
433
485
|
log_msg = f"SQL to run for model '{self.name}':\n{sql}"
|
|
434
486
|
log_msg += f"\n\n(with placeholders: {placeholders})"
|
|
435
|
-
self.logger.
|
|
487
|
+
self.logger.debug(log_msg)
|
|
436
488
|
|
|
437
489
|
|
|
438
490
|
@dataclass
|
|
@@ -457,11 +509,11 @@ class DbviewModel(QueryModel):
|
|
|
457
509
|
if source_model.model_config.get_connection() != self.model_config.get_connection():
|
|
458
510
|
raise u.ConfigurationError(f'Dbview "{self.name}" references source "{source_name}" with different connection')
|
|
459
511
|
|
|
460
|
-
# Check if the source model has
|
|
461
|
-
if not source_model.model_config.
|
|
512
|
+
# Check if the source model has load_to_vdl=False but this dbview has translate_to_duckdb=True
|
|
513
|
+
if not source_model.model_config.load_to_vdl and self.model_config.translate_to_duckdb:
|
|
462
514
|
raise u.ConfigurationError(
|
|
463
515
|
f'Dbview "{self.name}" with translate_to_duckdb=True cannot reference source "{source_name}" '
|
|
464
|
-
f'which has
|
|
516
|
+
f'which has load_to_vdl=False'
|
|
465
517
|
)
|
|
466
518
|
|
|
467
519
|
self.model_config.depends_on.add(source_name)
|
|
@@ -474,10 +526,11 @@ class DbviewModel(QueryModel):
|
|
|
474
526
|
|
|
475
527
|
def _get_duckdb_query(self, read_dialect: str, query: str) -> str:
|
|
476
528
|
kwargs = {
|
|
477
|
-
"source": lambda source_name: "
|
|
529
|
+
"source": lambda source_name: "vdl." + source_name
|
|
478
530
|
}
|
|
479
531
|
compiled_query = self._get_compiled_sql_query_str(query, kwargs)
|
|
480
|
-
|
|
532
|
+
duckdb_query = sqlglot.transpile(compiled_query, read=read_dialect, write="duckdb", pretty=True)[0]
|
|
533
|
+
return "-- translated to duckdb\n" + duckdb_query
|
|
481
534
|
|
|
482
535
|
def _compile_sql_model(self, kwargs: dict[str, Any]) -> mq.SqlModelQuery:
|
|
483
536
|
compiled_query_str = self._get_compiled_sql_query_str(self.query_file.raw_query, kwargs)
|
|
@@ -486,15 +539,20 @@ class DbviewModel(QueryModel):
|
|
|
486
539
|
connection_props = self.conn_set.get_connection(connection_name)
|
|
487
540
|
|
|
488
541
|
if self.model_config.translate_to_duckdb and isinstance(connection_props, ConnectionProperties):
|
|
489
|
-
|
|
490
|
-
|
|
542
|
+
# Forbid translate_to_duckdb when dbview connection is duckdb
|
|
543
|
+
if connection_props.type == ConnectionTypeEnum.DUCKDB:
|
|
544
|
+
raise u.ConfigurationError(
|
|
545
|
+
f'Dbview "{self.name}" has translate_to_duckdb=True but its connection is duckdb. Use a federate model instead.'
|
|
546
|
+
)
|
|
547
|
+
macros = {
|
|
548
|
+
"source": lambda source_name: "vdl." + source_name
|
|
491
549
|
}
|
|
492
550
|
compiled_query2 = self._get_compiled_sql_query_str(compiled_query_str, macros)
|
|
493
551
|
compiled_query_str = self._get_duckdb_query(connection_props.dialect, compiled_query2)
|
|
494
552
|
is_duckdb = True
|
|
495
553
|
else:
|
|
496
|
-
macros = {
|
|
497
|
-
"source": lambda source_name: self.sources[source_name].get_table()
|
|
554
|
+
macros = {
|
|
555
|
+
"source": lambda source_name: self.sources[source_name].get_table()
|
|
498
556
|
}
|
|
499
557
|
compiled_query_str = self._get_compiled_sql_query_str(compiled_query_str, macros)
|
|
500
558
|
is_duckdb = False
|
|
@@ -515,7 +573,14 @@ class DbviewModel(QueryModel):
|
|
|
515
573
|
kwargs = self._get_compile_sql_model_args(ctx, ctx_args, models_dict)
|
|
516
574
|
self.compiled_query = self._compile_sql_model(kwargs)
|
|
517
575
|
|
|
518
|
-
self.logger.log_activity_time(
|
|
576
|
+
self.logger.log_activity_time(
|
|
577
|
+
f"compiling dbview model '{self.name}'", start,
|
|
578
|
+
additional_data={
|
|
579
|
+
"activity": "compiling data model",
|
|
580
|
+
"model_name": self.name,
|
|
581
|
+
"model_type": self.model_type.value
|
|
582
|
+
}
|
|
583
|
+
)
|
|
519
584
|
|
|
520
585
|
async def _run_sql_model(self, conn: duckdb.DuckDBPyConnection, placeholders: dict = {}) -> None:
|
|
521
586
|
assert self.compiled_query is not None
|
|
@@ -531,7 +596,7 @@ class DbviewModel(QueryModel):
|
|
|
531
596
|
self.logger.info(f"Running dbview '{self.name}' on duckdb")
|
|
532
597
|
return local_conn.sql(query, params=placeholders).pl()
|
|
533
598
|
except duckdb.CatalogException as e:
|
|
534
|
-
raise InvalidInputError(
|
|
599
|
+
raise InvalidInputError(409, f'dependent_data_model_not_found', f'Model "{self.name}" depends on static data models that cannot be found. Try building the Virtual Data Lake (VDL) first.')
|
|
535
600
|
except Exception as e:
|
|
536
601
|
raise RuntimeError(e)
|
|
537
602
|
finally:
|
|
@@ -551,7 +616,14 @@ class DbviewModel(QueryModel):
|
|
|
551
616
|
|
|
552
617
|
await self._run_sql_model(conn, placeholders)
|
|
553
618
|
|
|
554
|
-
self.logger.log_activity_time(
|
|
619
|
+
self.logger.log_activity_time(
|
|
620
|
+
f"running dbview model '{self.name}'", start,
|
|
621
|
+
additional_data={
|
|
622
|
+
"activity": "running data model",
|
|
623
|
+
"model_name": self.name,
|
|
624
|
+
"model_type": self.model_type.value
|
|
625
|
+
}
|
|
626
|
+
)
|
|
555
627
|
|
|
556
628
|
await super().run_model(conn, placeholders)
|
|
557
629
|
|
|
@@ -573,8 +645,16 @@ class FederateModel(QueryModel):
|
|
|
573
645
|
|
|
574
646
|
def ref(dependent_model_name: str) -> str:
|
|
575
647
|
dependent_model = self._ref_for_sql(dependent_model_name, models_dict)
|
|
576
|
-
|
|
577
|
-
|
|
648
|
+
dep = models_dict[dependent_model]
|
|
649
|
+
if isinstance(dep, BuildModel):
|
|
650
|
+
return "vdl." + dependent_model
|
|
651
|
+
if isinstance(dep, SourceModel):
|
|
652
|
+
if dep.model_config.load_to_vdl:
|
|
653
|
+
return "vdl." + dependent_model
|
|
654
|
+
conn_name = dep.model_config.get_connection()
|
|
655
|
+
table_name = dep.model_config.get_table()
|
|
656
|
+
return f"db_{conn_name}.{table_name}"
|
|
657
|
+
return dependent_model
|
|
578
658
|
|
|
579
659
|
kwargs["ref"] = ref
|
|
580
660
|
return kwargs
|
|
@@ -591,12 +671,21 @@ class FederateModel(QueryModel):
|
|
|
591
671
|
dependencies = self.model_config.depends_on
|
|
592
672
|
connections = self.conn_set.get_connections_as_dict()
|
|
593
673
|
|
|
594
|
-
def
|
|
595
|
-
return self._run_sql_query_on_connection(connection_name, sql_query, ctx_args.
|
|
674
|
+
def _run_external_sql(connection_name: str, sql_query: str) -> pl.DataFrame:
|
|
675
|
+
return self._run_sql_query_on_connection(connection_name, sql_query, ctx_args._placeholders)
|
|
676
|
+
|
|
677
|
+
build_model_args = BuildModelArgs(
|
|
678
|
+
**ctx_args._conn_args.__dict__,
|
|
679
|
+
connections=connections, dependencies=dependencies,
|
|
680
|
+
_ref_func=self._ref_for_python, _run_external_sql_func=_run_external_sql
|
|
681
|
+
)
|
|
596
682
|
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
683
|
+
# Instantiate ModelArgs with flattened arguments
|
|
684
|
+
combined_args = {
|
|
685
|
+
**ctx_args.__dict__, **build_model_args.__dict__, "ctx": ctx,
|
|
686
|
+
}
|
|
687
|
+
model_args = ModelArgs(**combined_args)
|
|
688
|
+
return model_args
|
|
600
689
|
|
|
601
690
|
def _compile_python_model(
|
|
602
691
|
self, query_file: mq.PyQueryFile, ctx: dict[str, Any], ctx_args: ContextArgs
|
|
@@ -628,7 +717,14 @@ class FederateModel(QueryModel):
|
|
|
628
717
|
else:
|
|
629
718
|
raise NotImplementedError(f"Query type not supported: {self.query_file.__class__.__name__}")
|
|
630
719
|
|
|
631
|
-
self.logger.log_activity_time(
|
|
720
|
+
self.logger.log_activity_time(
|
|
721
|
+
f"compiling federate model '{self.name}'", start,
|
|
722
|
+
additional_data={
|
|
723
|
+
"activity": "compiling data model",
|
|
724
|
+
"model_name": self.name,
|
|
725
|
+
"model_type": self.model_type.value
|
|
726
|
+
}
|
|
727
|
+
)
|
|
632
728
|
|
|
633
729
|
if not recurse:
|
|
634
730
|
return
|
|
@@ -657,10 +753,13 @@ class FederateModel(QueryModel):
|
|
|
657
753
|
try:
|
|
658
754
|
return local_conn.execute(create_query, existing_placeholders)
|
|
659
755
|
except duckdb.CatalogException as e:
|
|
660
|
-
|
|
756
|
+
if self.name == "__fake_target":
|
|
757
|
+
raise InvalidInputError(409, "invalid_sql_query", f"Provided SQL query depends on static data models that cannot be found. Try building the Virtual Data Lake (VDL) first.")
|
|
758
|
+
else:
|
|
759
|
+
raise InvalidInputError(409, f'dependent_data_model_not_found', f'Model "{self.name}" depends on static data models that cannot be found. Try building the Virtual Data Lake (VDL) first.')
|
|
661
760
|
except Exception as e:
|
|
662
761
|
if self.name == "__fake_target":
|
|
663
|
-
raise InvalidInputError(
|
|
762
|
+
raise InvalidInputError(400, "invalid_sql_query", f"Failed to run provided SQL query")
|
|
664
763
|
else:
|
|
665
764
|
raise FileExecutionError(f'Failed to run federate sql model "{self.name}"', e) from e
|
|
666
765
|
|
|
@@ -687,7 +786,14 @@ class FederateModel(QueryModel):
|
|
|
687
786
|
else:
|
|
688
787
|
raise NotImplementedError(f"Query type not supported: {self.query_file.__class__.__name__}")
|
|
689
788
|
|
|
690
|
-
self.logger.log_activity_time(
|
|
789
|
+
self.logger.log_activity_time(
|
|
790
|
+
f"running federate model '{self.name}'", start,
|
|
791
|
+
additional_data={
|
|
792
|
+
"activity": "running data model",
|
|
793
|
+
"model_name": self.name,
|
|
794
|
+
"model_type": self.model_type.value
|
|
795
|
+
}
|
|
796
|
+
)
|
|
691
797
|
|
|
692
798
|
await super().run_model(conn, placeholders)
|
|
693
799
|
|
|
@@ -717,7 +823,12 @@ class BuildModel(StaticModel, QueryModel):
|
|
|
717
823
|
}
|
|
718
824
|
|
|
719
825
|
def ref_for_build(dependent_model_name: str) -> str:
|
|
720
|
-
dependent_model = self._ref_for_sql(dependent_model_name,
|
|
826
|
+
dependent_model = self._ref_for_sql(dependent_model_name, models_dict)
|
|
827
|
+
dep = models_dict[dependent_model]
|
|
828
|
+
if isinstance(dep, SourceModel) and not dep.model_config.load_to_vdl:
|
|
829
|
+
conn_name = dep.model_config.get_connection()
|
|
830
|
+
table_name = dep.model_config.get_table()
|
|
831
|
+
return f"db_{conn_name}.{table_name}"
|
|
721
832
|
return dependent_model
|
|
722
833
|
|
|
723
834
|
kwargs["ref"] = ref_for_build
|
|
@@ -740,11 +851,13 @@ class BuildModel(StaticModel, QueryModel):
|
|
|
740
851
|
|
|
741
852
|
def _get_compile_python_model_args(self, conn_args: ConnectionsArgs) -> BuildModelArgs:
|
|
742
853
|
|
|
743
|
-
def
|
|
854
|
+
def _run_external_sql(connection_name: str, sql_query: str):
|
|
744
855
|
return self._run_sql_query_on_connection(connection_name, sql_query)
|
|
745
856
|
|
|
746
857
|
return BuildModelArgs(
|
|
747
|
-
conn_args
|
|
858
|
+
**conn_args.__dict__,
|
|
859
|
+
connections=self.conn_set.get_connections_as_dict(), dependencies=self.model_config.depends_on,
|
|
860
|
+
_ref_func=self._ref_for_python, _run_external_sql_func=_run_external_sql
|
|
748
861
|
)
|
|
749
862
|
|
|
750
863
|
def _compile_python_model(
|
|
@@ -770,7 +883,14 @@ class BuildModel(StaticModel, QueryModel):
|
|
|
770
883
|
else:
|
|
771
884
|
raise NotImplementedError(f"Query type not supported: {self.query_file.__class__.__name__}")
|
|
772
885
|
|
|
773
|
-
self.logger.log_activity_time(
|
|
886
|
+
self.logger.log_activity_time(
|
|
887
|
+
f"compiling build model '{self.name}'", start,
|
|
888
|
+
additional_data={
|
|
889
|
+
"activity": "compiling data model",
|
|
890
|
+
"model_name": self.name,
|
|
891
|
+
"model_type": self.model_type.value
|
|
892
|
+
}
|
|
893
|
+
)
|
|
774
894
|
|
|
775
895
|
dependencies = self.model_config.depends_on
|
|
776
896
|
self.wait_count_for_build = len(dependencies)
|
|
@@ -785,14 +905,17 @@ class BuildModel(StaticModel, QueryModel):
|
|
|
785
905
|
def create_table():
|
|
786
906
|
create_query = self.model_config.get_sql_for_build(self.name, query)
|
|
787
907
|
local_conn = conn.cursor()
|
|
908
|
+
# local_conn = conn
|
|
788
909
|
try:
|
|
789
910
|
return u.run_duckdb_stmt(self.logger, local_conn, create_query, model_name=self.name)
|
|
790
911
|
except Exception as e:
|
|
791
912
|
raise FileExecutionError(f'Failed to build static sql model "{self.name}"', e) from e
|
|
792
913
|
finally:
|
|
793
914
|
local_conn.close()
|
|
915
|
+
# pass
|
|
794
916
|
|
|
795
|
-
await asyncio.to_thread(create_table)
|
|
917
|
+
# await asyncio.to_thread(create_table)
|
|
918
|
+
create_table() # without threading
|
|
796
919
|
|
|
797
920
|
async def _build_python_model(self, compiled_query: mq.PyModelQuery, conn: duckdb.DuckDBPyConnection) -> None:
|
|
798
921
|
query_result = await asyncio.to_thread(compiled_query.query)
|
|
@@ -800,7 +923,8 @@ class BuildModel(StaticModel, QueryModel):
|
|
|
800
923
|
query_result = pl.from_pandas(query_result).lazy()
|
|
801
924
|
if self.needs_python_df_for_build:
|
|
802
925
|
self.result = query_result.lazy()
|
|
803
|
-
await asyncio.to_thread(self._create_table_from_df, conn, query_result)
|
|
926
|
+
# await asyncio.to_thread(self._create_table_from_df, conn, query_result)
|
|
927
|
+
self._create_table_from_df(conn, query_result) # without threading
|
|
804
928
|
|
|
805
929
|
async def build_model(self, conn: duckdb.DuckDBPyConnection, full_refresh: bool) -> None:
|
|
806
930
|
start = time.time()
|
|
@@ -813,24 +937,33 @@ class BuildModel(StaticModel, QueryModel):
|
|
|
813
937
|
def load_df(conn: duckdb.DuckDBPyConnection, dep_model: DataModel):
|
|
814
938
|
if dep_model.result is None:
|
|
815
939
|
local_conn = conn.cursor()
|
|
940
|
+
# local_conn = conn
|
|
816
941
|
try:
|
|
817
942
|
dep_model.result = dep_model._load_duckdb_view_to_python_df(local_conn)
|
|
818
943
|
finally:
|
|
819
944
|
local_conn.close()
|
|
945
|
+
# pass
|
|
820
946
|
|
|
821
947
|
coroutines = []
|
|
822
948
|
for dep_model in self.upstreams_for_build.values():
|
|
823
949
|
coro = asyncio.to_thread(load_df, conn, dep_model)
|
|
824
950
|
coroutines.append(coro)
|
|
825
951
|
await u.asyncio_gather(coroutines)
|
|
826
|
-
|
|
952
|
+
|
|
827
953
|
# Then run the model's Python function to build the model
|
|
828
954
|
await self._build_python_model(self.compiled_query, conn)
|
|
829
955
|
else:
|
|
830
956
|
raise NotImplementedError(f"Query type not supported: {self.query_file.__class__.__name__}")
|
|
831
957
|
|
|
832
958
|
print(f"[{u.get_current_time()}] ✅ FINISHED: build model '{self.name}'")
|
|
833
|
-
self.logger.log_activity_time(
|
|
959
|
+
self.logger.log_activity_time(
|
|
960
|
+
f"building static build model '{self.name}' into VDL", start,
|
|
961
|
+
additional_data={
|
|
962
|
+
"activity": "building data model into VDL",
|
|
963
|
+
"model_name": self.name,
|
|
964
|
+
"model_type": self.model_type.value
|
|
965
|
+
}
|
|
966
|
+
)
|
|
834
967
|
|
|
835
968
|
await super().build_model(conn, full_refresh)
|
|
836
969
|
|
|
@@ -840,7 +973,7 @@ class DAG:
|
|
|
840
973
|
dataset: DatasetConfig | None
|
|
841
974
|
target_model: DataModel
|
|
842
975
|
models_dict: dict[str, DataModel]
|
|
843
|
-
|
|
976
|
+
datalake_db_path: str | None = field(default=None)
|
|
844
977
|
logger: u.Logger = field(default_factory=lambda: u.Logger(""))
|
|
845
978
|
parameter_set: ParameterSet | None = field(default=None, init=False) # set in apply_selections
|
|
846
979
|
placeholders: dict[str, Any] = field(init=False, default_factory=dict)
|
|
@@ -849,36 +982,52 @@ class DAG:
|
|
|
849
982
|
return f" for dataset '{self.dataset.name}'" if self.dataset else ""
|
|
850
983
|
|
|
851
984
|
def compile_build_models(self, conn_args: ConnectionsArgs) -> None:
|
|
852
|
-
static_models: dict[str, StaticModel] = {
|
|
985
|
+
static_models: dict[str, StaticModel] = {
|
|
986
|
+
k: v for k, v in self.models_dict.items() if isinstance(v, StaticModel)
|
|
987
|
+
}
|
|
853
988
|
for model in static_models.values():
|
|
854
989
|
if isinstance(model, BuildModel):
|
|
855
990
|
model.compile_for_build(conn_args, static_models)
|
|
856
991
|
|
|
857
992
|
def apply_selections(
|
|
858
|
-
self, param_cfg_set: ParameterConfigsSet, user:
|
|
993
|
+
self, param_cfg_set: ParameterConfigsSet, user: AbstractUser, selections: dict[str, str]
|
|
859
994
|
) -> None:
|
|
860
995
|
start = time.time()
|
|
996
|
+
|
|
861
997
|
dataset_params = self.dataset.parameters if self.dataset else None
|
|
862
998
|
parameter_set = param_cfg_set.apply_selections(dataset_params, selections, user)
|
|
863
999
|
self.parameter_set = parameter_set
|
|
864
1000
|
msg_extension = self._get_msg_extension()
|
|
865
|
-
|
|
1001
|
+
|
|
1002
|
+
dataset_name = self.dataset.name if self.dataset else None
|
|
1003
|
+
self.logger.log_activity_time(
|
|
1004
|
+
"applying selections" + msg_extension, start,
|
|
1005
|
+
additional_data={"activity": "applying selections", "dataset_name": dataset_name}
|
|
1006
|
+
)
|
|
866
1007
|
|
|
867
1008
|
def _compile_context(
|
|
868
|
-
self, param_args: ParametersArgs, context_func: ContextFunc, user:
|
|
1009
|
+
self, param_args: ParametersArgs, context_func: ContextFunc, user: AbstractUser, configurables: dict[str, str]
|
|
869
1010
|
) -> tuple[dict[str, Any], ContextArgs]:
|
|
870
1011
|
start = time.time()
|
|
1012
|
+
|
|
871
1013
|
context = {}
|
|
872
1014
|
assert isinstance(self.parameter_set, ParameterSet)
|
|
873
1015
|
prms = self.parameter_set.get_parameters_as_dict()
|
|
874
|
-
|
|
875
|
-
|
|
1016
|
+
args = ContextArgs(
|
|
1017
|
+
**param_args.__dict__, user=user, prms=prms, configurables=configurables, _conn_args=param_args
|
|
1018
|
+
)
|
|
876
1019
|
msg_extension = self._get_msg_extension()
|
|
1020
|
+
|
|
877
1021
|
try:
|
|
878
1022
|
context_func(context, args)
|
|
879
1023
|
except Exception as e:
|
|
880
1024
|
raise FileExecutionError(f'Failed to run {c.CONTEXT_FILE}' + msg_extension, e) from e
|
|
881
|
-
|
|
1025
|
+
|
|
1026
|
+
dataset_name = self.dataset.name if self.dataset else None
|
|
1027
|
+
self.logger.log_activity_time(
|
|
1028
|
+
"running context.py" + msg_extension, start,
|
|
1029
|
+
additional_data={"activity": "running context.py", "dataset_name": dataset_name}
|
|
1030
|
+
)
|
|
882
1031
|
return context, args
|
|
883
1032
|
|
|
884
1033
|
def _compile_models(self, context: dict[str, Any], ctx_args: ContextArgs, recurse: bool) -> None:
|
|
@@ -889,29 +1038,25 @@ class DAG:
|
|
|
889
1038
|
terminal_nodes = self.target_model.get_terminal_nodes(set())
|
|
890
1039
|
for model in self.models_dict.values():
|
|
891
1040
|
model.confirmed_no_cycles = False
|
|
892
|
-
self.logger.log_activity_time(
|
|
1041
|
+
self.logger.log_activity_time("validating no cycles in model dependencies", start)
|
|
893
1042
|
return terminal_nodes
|
|
894
1043
|
|
|
1044
|
+
def _attach_connections_with_type_duckdb(self, conn: duckdb.DuckDBPyConnection) -> None:
|
|
1045
|
+
for conn_name, connection in self.target_model.conn_set.get_connections_as_dict().items():
|
|
1046
|
+
if not isinstance(connection, ConnectionProperties):
|
|
1047
|
+
continue
|
|
1048
|
+
attach_uri = connection.attach_uri_for_duckdb
|
|
1049
|
+
if attach_uri is None:
|
|
1050
|
+
continue
|
|
1051
|
+
attach_stmt = f"ATTACH IF NOT EXISTS '{attach_uri}' AS db_{conn_name} (READ_ONLY)"
|
|
1052
|
+
u.run_duckdb_stmt(self.logger, conn, attach_stmt, redacted_values=[attach_uri])
|
|
1053
|
+
|
|
895
1054
|
async def _run_models(self) -> None:
|
|
896
1055
|
terminal_nodes = self._get_terminal_nodes()
|
|
897
1056
|
|
|
898
|
-
|
|
1057
|
+
conn = u.create_duckdb_connection(datalake_db_path=self.datalake_db_path)
|
|
899
1058
|
try:
|
|
900
|
-
|
|
901
|
-
conn.close()
|
|
902
|
-
except duckdb.IOException as e:
|
|
903
|
-
# unable to create duckdb venv file means it's in use and already exists
|
|
904
|
-
# do not throw error here since attaching in read-only mode later may still work
|
|
905
|
-
pass
|
|
906
|
-
|
|
907
|
-
conn = u.create_duckdb_connection()
|
|
908
|
-
try:
|
|
909
|
-
read_only = "(READ_ONLY)" if self.duckdb_filepath else ""
|
|
910
|
-
try:
|
|
911
|
-
conn.execute(f"ATTACH '{self.duckdb_filepath}' AS venv {read_only}")
|
|
912
|
-
except duckdb.IOException as e:
|
|
913
|
-
self.logger.warning(f"Unable to attach to duckdb venv file: {self.duckdb_filepath}")
|
|
914
|
-
raise e
|
|
1059
|
+
self._attach_connections_with_type_duckdb(conn)
|
|
915
1060
|
|
|
916
1061
|
coroutines = []
|
|
917
1062
|
for model_name in terminal_nodes:
|
|
@@ -923,18 +1068,18 @@ class DAG:
|
|
|
923
1068
|
conn.close()
|
|
924
1069
|
|
|
925
1070
|
async def execute(
|
|
926
|
-
self, param_args: ParametersArgs, param_cfg_set: ParameterConfigsSet, context_func: ContextFunc, user:
|
|
927
|
-
*, runquery: bool = True, recurse: bool = True,
|
|
1071
|
+
self, param_args: ParametersArgs, param_cfg_set: ParameterConfigsSet, context_func: ContextFunc, user: AbstractUser, selections: dict[str, str],
|
|
1072
|
+
*, runquery: bool = True, recurse: bool = True, configurables: dict[str, str] = {}
|
|
928
1073
|
) -> None:
|
|
929
1074
|
recurse = (recurse or runquery)
|
|
930
1075
|
|
|
931
1076
|
self.apply_selections(param_cfg_set, user, selections)
|
|
932
1077
|
|
|
933
|
-
context, ctx_args = self._compile_context(param_args, context_func, user,
|
|
1078
|
+
context, ctx_args = self._compile_context(param_args, context_func, user, configurables)
|
|
934
1079
|
|
|
935
1080
|
self._compile_models(context, ctx_args, recurse)
|
|
936
1081
|
|
|
937
|
-
self.placeholders = ctx_args.
|
|
1082
|
+
self.placeholders = dict(ctx_args._placeholders)
|
|
938
1083
|
if runquery:
|
|
939
1084
|
await self._run_models()
|
|
940
1085
|
|
|
@@ -945,51 +1090,37 @@ class DAG:
|
|
|
945
1090
|
self.target_model.retrieve_dependent_query_models(all_model_names)
|
|
946
1091
|
return all_model_names
|
|
947
1092
|
|
|
948
|
-
def
|
|
949
|
-
G = nx.DiGraph()
|
|
950
|
-
|
|
951
|
-
for model_name, model in self.models_dict.items():
|
|
952
|
-
level = model.get_max_path_length_to_target()
|
|
953
|
-
if level is not None:
|
|
954
|
-
G.add_node(model_name, layer=-level, model_type=model.model_type)
|
|
955
|
-
|
|
956
|
-
for model_name in G.nodes:
|
|
957
|
-
model = self.models_dict[model_name]
|
|
958
|
-
for dep_model_name in model.downstreams:
|
|
959
|
-
G.add_edge(model_name, dep_model_name)
|
|
960
|
-
|
|
961
|
-
return G
|
|
962
|
-
|
|
963
|
-
def get_all_data_models(self) -> list[arm.DataModelItem]:
|
|
1093
|
+
def get_all_data_models(self) -> list[rm.DataModelItem]:
|
|
964
1094
|
data_models = []
|
|
965
1095
|
for model_name, model in self.models_dict.items():
|
|
966
1096
|
is_queryable = model.is_queryable
|
|
967
|
-
data_model =
|
|
1097
|
+
data_model = rm.DataModelItem(name=model_name, model_type=model.model_type.value, config=model.model_config, is_queryable=is_queryable)
|
|
968
1098
|
data_models.append(data_model)
|
|
969
1099
|
return data_models
|
|
970
1100
|
|
|
971
|
-
def get_all_model_lineage(self) -> list[
|
|
1101
|
+
def get_all_model_lineage(self) -> list[rm.LineageRelation]:
|
|
972
1102
|
model_lineage = []
|
|
973
1103
|
for model_name, model in self.models_dict.items():
|
|
974
1104
|
if not isinstance(model, QueryModel):
|
|
975
1105
|
continue
|
|
976
1106
|
for dep_model_name in model.model_config.depends_on:
|
|
977
1107
|
edge_type = "buildtime" if isinstance(model, BuildModel) else "runtime"
|
|
978
|
-
source_model =
|
|
979
|
-
target_model =
|
|
980
|
-
model_lineage.append(
|
|
1108
|
+
source_model = rm.LineageNode(name=dep_model_name, type="model")
|
|
1109
|
+
target_model = rm.LineageNode(name=model_name, type="model")
|
|
1110
|
+
model_lineage.append(rm.LineageRelation(type=edge_type, source=source_model, target=target_model))
|
|
981
1111
|
return model_lineage
|
|
982
1112
|
|
|
983
1113
|
|
|
984
1114
|
class ModelsIO:
|
|
985
1115
|
|
|
986
1116
|
@classmethod
|
|
987
|
-
def _load_model_config(cls, filepath: Path, model_type: ModelType, env_vars:
|
|
1117
|
+
def _load_model_config(cls, filepath: Path, model_type: ModelType, env_vars: SquirrelsEnvVars) -> mc.ModelConfig:
|
|
988
1118
|
yaml_path = filepath.with_suffix('.yml')
|
|
989
1119
|
config_dict = u.load_yaml_config(yaml_path) if yaml_path.exists() else {}
|
|
990
1120
|
|
|
991
1121
|
if model_type == ModelType.DBVIEW:
|
|
992
|
-
|
|
1122
|
+
default_conn_name = env_vars.connections_default_name_used
|
|
1123
|
+
config = mc.DbviewModelConfig(**config_dict).finalize_connection(default_conn_name=default_conn_name)
|
|
993
1124
|
return config
|
|
994
1125
|
elif model_type == ModelType.FEDERATE:
|
|
995
1126
|
return mc.FederateModelConfig(**config_dict)
|
|
@@ -1000,13 +1131,13 @@ class ModelsIO:
|
|
|
1000
1131
|
|
|
1001
1132
|
@classmethod
|
|
1002
1133
|
def _populate_from_file(
|
|
1003
|
-
cls, raw_queries_by_model: dict[str, mq.QueryFileWithConfig], dp: str, file: str, model_type: ModelType, env_vars:
|
|
1134
|
+
cls, raw_queries_by_model: dict[str, mq.QueryFileWithConfig], dp: str, file: str, model_type: ModelType, env_vars: SquirrelsEnvVars
|
|
1004
1135
|
) -> None:
|
|
1005
1136
|
filepath = Path(dp, file)
|
|
1006
1137
|
file_stem, extension = os.path.splitext(file)
|
|
1007
1138
|
|
|
1008
1139
|
if extension == '.py':
|
|
1009
|
-
module = pm.PyModule(filepath)
|
|
1140
|
+
module = pm.PyModule(filepath, project_path=env_vars.project_path)
|
|
1010
1141
|
raw_query = module.get_func_or_class(c.MAIN_FUNC)
|
|
1011
1142
|
query_file = mq.PyQueryFile(filepath.as_posix(), raw_query)
|
|
1012
1143
|
elif extension == '.sql':
|
|
@@ -1024,7 +1155,7 @@ class ModelsIO:
|
|
|
1024
1155
|
|
|
1025
1156
|
@classmethod
|
|
1026
1157
|
def _populate_raw_queries_for_type(
|
|
1027
|
-
cls, folder_path: Path, model_type: ModelType,
|
|
1158
|
+
cls, folder_path: Path, model_type: ModelType, env_vars: SquirrelsEnvVars
|
|
1028
1159
|
) -> dict[str, mq.QueryFileWithConfig]:
|
|
1029
1160
|
raw_queries_by_model: dict[str, mq.QueryFileWithConfig] = {}
|
|
1030
1161
|
for dp, _, filenames in os.walk(folder_path):
|
|
@@ -1033,35 +1164,37 @@ class ModelsIO:
|
|
|
1033
1164
|
return raw_queries_by_model
|
|
1034
1165
|
|
|
1035
1166
|
@classmethod
|
|
1036
|
-
def load_build_files(cls, logger: u.Logger,
|
|
1167
|
+
def load_build_files(cls, logger: u.Logger, env_vars: SquirrelsEnvVars) -> dict[str, mq.QueryFileWithConfig]:
|
|
1037
1168
|
start = time.time()
|
|
1038
|
-
builds_path = u.Path(
|
|
1039
|
-
raw_queries_by_model = cls._populate_raw_queries_for_type(builds_path, ModelType.BUILD)
|
|
1169
|
+
builds_path = u.Path(env_vars.project_path, c.MODELS_FOLDER, c.BUILDS_FOLDER)
|
|
1170
|
+
raw_queries_by_model = cls._populate_raw_queries_for_type(builds_path, ModelType.BUILD, env_vars=env_vars)
|
|
1040
1171
|
logger.log_activity_time("loading build files", start)
|
|
1041
1172
|
return raw_queries_by_model
|
|
1042
1173
|
|
|
1043
1174
|
@classmethod
|
|
1044
|
-
def load_dbview_files(cls, logger: u.Logger,
|
|
1175
|
+
def load_dbview_files(cls, logger: u.Logger, env_vars: SquirrelsEnvVars) -> dict[str, mq.QueryFileWithConfig]:
|
|
1045
1176
|
start = time.time()
|
|
1046
|
-
dbviews_path = u.Path(
|
|
1177
|
+
dbviews_path = u.Path(env_vars.project_path, c.MODELS_FOLDER, c.DBVIEWS_FOLDER)
|
|
1047
1178
|
raw_queries_by_model = cls._populate_raw_queries_for_type(dbviews_path, ModelType.DBVIEW, env_vars=env_vars)
|
|
1048
1179
|
logger.log_activity_time("loading dbview files", start)
|
|
1049
1180
|
return raw_queries_by_model
|
|
1050
1181
|
|
|
1051
1182
|
@classmethod
|
|
1052
|
-
def load_federate_files(cls, logger: u.Logger,
|
|
1183
|
+
def load_federate_files(cls, logger: u.Logger, env_vars: SquirrelsEnvVars) -> dict[str, mq.QueryFileWithConfig]:
|
|
1053
1184
|
start = time.time()
|
|
1054
|
-
federates_path = u.Path(
|
|
1055
|
-
raw_queries_by_model = cls._populate_raw_queries_for_type(federates_path, ModelType.FEDERATE)
|
|
1185
|
+
federates_path = u.Path(env_vars.project_path, c.MODELS_FOLDER, c.FEDERATES_FOLDER)
|
|
1186
|
+
raw_queries_by_model = cls._populate_raw_queries_for_type(federates_path, ModelType.FEDERATE, env_vars=env_vars)
|
|
1056
1187
|
logger.log_activity_time("loading federate files", start)
|
|
1057
1188
|
return raw_queries_by_model
|
|
1058
1189
|
|
|
1059
1190
|
@classmethod
|
|
1060
|
-
def load_context_func(cls, logger: u.Logger,
|
|
1191
|
+
def load_context_func(cls, logger: u.Logger, project_path: str) -> ContextFunc:
|
|
1061
1192
|
start = time.time()
|
|
1062
1193
|
|
|
1063
|
-
context_path = u.Path(
|
|
1064
|
-
context_func: ContextFunc = pm.PyModule(
|
|
1194
|
+
context_path = u.Path(project_path, c.PYCONFIGS_FOLDER, c.CONTEXT_FILE)
|
|
1195
|
+
context_func: ContextFunc = pm.PyModule(
|
|
1196
|
+
context_path, project_path=project_path
|
|
1197
|
+
).get_func_or_class(c.MAIN_FUNC, default_attr=lambda ctx, sqrl: None)
|
|
1065
1198
|
|
|
1066
1199
|
logger.log_activity_time("loading file for context.py", start)
|
|
1067
1200
|
return context_func
|