squirrels 0.5.0rc0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of squirrels might be problematic. Click here for more details.
- dateutils/__init__.py +6 -0
- dateutils/_enums.py +25 -0
- squirrels/dateutils.py → dateutils/_implementation.py +58 -111
- dateutils/types.py +6 -0
- squirrels/__init__.py +10 -12
- squirrels/_api_routes/__init__.py +5 -0
- squirrels/_api_routes/auth.py +271 -0
- squirrels/_api_routes/base.py +171 -0
- squirrels/_api_routes/dashboards.py +158 -0
- squirrels/_api_routes/data_management.py +148 -0
- squirrels/_api_routes/datasets.py +265 -0
- squirrels/_api_routes/oauth2.py +298 -0
- squirrels/_api_routes/project.py +252 -0
- squirrels/_api_server.py +245 -781
- squirrels/_arguments/__init__.py +0 -0
- squirrels/{arguments → _arguments}/init_time_args.py +7 -2
- squirrels/{arguments → _arguments}/run_time_args.py +13 -35
- squirrels/_auth.py +720 -212
- squirrels/_command_line.py +81 -41
- squirrels/_compile_prompts.py +147 -0
- squirrels/_connection_set.py +16 -7
- squirrels/_constants.py +29 -9
- squirrels/{_dashboards_io.py → _dashboards.py} +87 -6
- squirrels/_data_sources.py +570 -0
- squirrels/{dataset_result.py → _dataset_types.py} +2 -4
- squirrels/_exceptions.py +9 -37
- squirrels/_initializer.py +83 -59
- squirrels/_logging.py +117 -0
- squirrels/_manifest.py +129 -62
- squirrels/_model_builder.py +10 -52
- squirrels/_model_configs.py +3 -3
- squirrels/_model_queries.py +1 -1
- squirrels/_models.py +249 -118
- squirrels/{package_data → _package_data}/base_project/.env +16 -4
- squirrels/{package_data → _package_data}/base_project/.env.example +15 -3
- squirrels/{package_data → _package_data}/base_project/connections.yml +4 -3
- squirrels/{package_data → _package_data}/base_project/dashboards/dashboard_example.py +4 -4
- squirrels/_package_data/base_project/dashboards/dashboard_example.yml +22 -0
- squirrels/{package_data → _package_data}/base_project/duckdb_init.sql +1 -0
- squirrels/_package_data/base_project/macros/macros_example.sql +17 -0
- squirrels/{package_data → _package_data}/base_project/models/builds/build_example.py +2 -2
- squirrels/{package_data → _package_data}/base_project/models/builds/build_example.sql +1 -1
- squirrels/{package_data → _package_data}/base_project/models/builds/build_example.yml +2 -0
- squirrels/_package_data/base_project/models/dbviews/dbview_example.sql +17 -0
- squirrels/_package_data/base_project/models/dbviews/dbview_example.yml +32 -0
- squirrels/_package_data/base_project/models/federates/federate_example.py +48 -0
- squirrels/_package_data/base_project/models/federates/federate_example.sql +21 -0
- squirrels/{package_data → _package_data}/base_project/models/federates/federate_example.yml +7 -7
- squirrels/{package_data → _package_data}/base_project/models/sources.yml +5 -6
- squirrels/{package_data → _package_data}/base_project/parameters.yml +32 -45
- squirrels/_package_data/base_project/pyconfigs/connections.py +18 -0
- squirrels/{package_data → _package_data}/base_project/pyconfigs/context.py +31 -22
- squirrels/_package_data/base_project/pyconfigs/parameters.py +141 -0
- squirrels/_package_data/base_project/pyconfigs/user.py +44 -0
- squirrels/{package_data → _package_data}/base_project/seeds/seed_categories.yml +1 -1
- squirrels/{package_data → _package_data}/base_project/seeds/seed_subcategories.yml +1 -1
- squirrels/_package_data/base_project/squirrels.yml.j2 +61 -0
- squirrels/_package_data/templates/dataset_results.html +112 -0
- squirrels/_package_data/templates/oauth_login.html +271 -0
- squirrels/_package_data/templates/squirrels_studio.html +20 -0
- squirrels/_parameter_configs.py +76 -55
- squirrels/_parameter_options.py +348 -0
- squirrels/_parameter_sets.py +53 -45
- squirrels/_parameters.py +1664 -0
- squirrels/_project.py +403 -242
- squirrels/_py_module.py +3 -2
- squirrels/_request_context.py +33 -0
- squirrels/_schemas/__init__.py +0 -0
- squirrels/_schemas/auth_models.py +167 -0
- squirrels/_schemas/query_param_models.py +75 -0
- squirrels/{_api_response_models.py → _schemas/response_models.py} +48 -18
- squirrels/_seeds.py +1 -1
- squirrels/_sources.py +23 -19
- squirrels/_utils.py +121 -39
- squirrels/_version.py +1 -1
- squirrels/arguments.py +7 -0
- squirrels/auth.py +4 -0
- squirrels/connections.py +3 -0
- squirrels/dashboards.py +2 -81
- squirrels/data_sources.py +14 -563
- squirrels/parameter_options.py +13 -348
- squirrels/parameters.py +14 -1266
- squirrels/types.py +16 -0
- {squirrels-0.5.0rc0.dist-info → squirrels-0.5.1.dist-info}/METADATA +42 -30
- squirrels-0.5.1.dist-info/RECORD +98 -0
- squirrels/package_data/base_project/dashboards/dashboard_example.yml +0 -22
- squirrels/package_data/base_project/macros/macros_example.sql +0 -15
- squirrels/package_data/base_project/models/dbviews/dbview_example.sql +0 -12
- squirrels/package_data/base_project/models/dbviews/dbview_example.yml +0 -26
- squirrels/package_data/base_project/models/federates/federate_example.py +0 -44
- squirrels/package_data/base_project/models/federates/federate_example.sql +0 -17
- squirrels/package_data/base_project/pyconfigs/connections.py +0 -14
- squirrels/package_data/base_project/pyconfigs/parameters.py +0 -93
- squirrels/package_data/base_project/pyconfigs/user.py +0 -23
- squirrels/package_data/base_project/squirrels.yml.j2 +0 -71
- squirrels-0.5.0rc0.dist-info/RECORD +0 -70
- /squirrels/{package_data → _package_data}/base_project/assets/expenses.db +0 -0
- /squirrels/{package_data → _package_data}/base_project/assets/weather.db +0 -0
- /squirrels/{package_data → _package_data}/base_project/docker/.dockerignore +0 -0
- /squirrels/{package_data → _package_data}/base_project/docker/Dockerfile +0 -0
- /squirrels/{package_data → _package_data}/base_project/docker/compose.yml +0 -0
- /squirrels/{package_data/base_project/.gitignore → _package_data/base_project/gitignore} +0 -0
- /squirrels/{package_data → _package_data}/base_project/seeds/seed_categories.csv +0 -0
- /squirrels/{package_data → _package_data}/base_project/seeds/seed_subcategories.csv +0 -0
- /squirrels/{package_data → _package_data}/base_project/tmp/.gitignore +0 -0
- {squirrels-0.5.0rc0.dist-info → squirrels-0.5.1.dist-info}/WHEEL +0 -0
- {squirrels-0.5.0rc0.dist-info → squirrels-0.5.1.dist-info}/entry_points.txt +0 -0
- {squirrels-0.5.0rc0.dist-info → squirrels-0.5.1.dist-info}/licenses/LICENSE +0 -0
squirrels/_models.py
CHANGED
|
@@ -5,25 +5,26 @@ from abc import ABCMeta, abstractmethod
|
|
|
5
5
|
from enum import Enum
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
import asyncio, os, re, time, duckdb, sqlglot
|
|
8
|
-
import polars as pl, pandas as pd
|
|
8
|
+
import polars as pl, pandas as pd
|
|
9
9
|
|
|
10
|
-
from . import _constants as c, _utils as u, _py_module as pm, _model_queries as mq, _model_configs as mc, _sources as src
|
|
10
|
+
from . import _constants as c, _utils as u, _py_module as pm, _model_queries as mq, _model_configs as mc, _sources as src
|
|
11
|
+
from ._schemas import response_models as rm
|
|
11
12
|
from ._exceptions import FileExecutionError, InvalidInputError
|
|
12
|
-
from .
|
|
13
|
-
from ._auth import
|
|
13
|
+
from ._arguments.run_time_args import ContextArgs, ModelArgs, BuildModelArgs
|
|
14
|
+
from ._auth import AbstractUser
|
|
14
15
|
from ._connection_set import ConnectionsArgs, ConnectionSet, ConnectionProperties
|
|
15
|
-
from ._manifest import DatasetConfig
|
|
16
|
+
from ._manifest import DatasetConfig, ConnectionTypeEnum
|
|
16
17
|
from ._parameter_sets import ParameterConfigsSet, ParametersArgs, ParameterSet
|
|
17
18
|
|
|
18
19
|
ContextFunc = Callable[[dict[str, Any], ContextArgs], None]
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
class ModelType(Enum):
|
|
23
|
+
SEED = "seed"
|
|
22
24
|
SOURCE = "source"
|
|
25
|
+
BUILD = "build"
|
|
23
26
|
DBVIEW = "dbview"
|
|
24
27
|
FEDERATE = "federate"
|
|
25
|
-
SEED = "seed"
|
|
26
|
-
BUILD = "build"
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
@dataclass
|
|
@@ -78,15 +79,15 @@ class DataModel(metaclass=ABCMeta):
|
|
|
78
79
|
self.confirmed_no_cycles = True
|
|
79
80
|
return terminal_nodes
|
|
80
81
|
|
|
81
|
-
def _load_duckdb_view_to_python_df(self, conn: duckdb.DuckDBPyConnection, *,
|
|
82
|
-
table_name = ("
|
|
82
|
+
def _load_duckdb_view_to_python_df(self, conn: duckdb.DuckDBPyConnection, *, use_datalake: bool = False) -> pl.LazyFrame:
|
|
83
|
+
table_name = ("vdl." if use_datalake else "") + self.name
|
|
83
84
|
try:
|
|
84
85
|
return conn.sql(f"FROM {table_name}").pl().lazy()
|
|
85
86
|
except duckdb.CatalogException as e:
|
|
86
87
|
raise u.ConfigurationError(f'Failed to load duckdb table or view "{self.name}" to python dataframe') from e
|
|
87
88
|
|
|
88
89
|
def _run_sql_query_on_connection(self, connection_name: str, query: str, placeholders: dict = {}) -> pl.DataFrame:
|
|
89
|
-
self.logger.
|
|
90
|
+
self.logger.debug(f"Running SQL query on connection '{connection_name}':\n{query}")
|
|
90
91
|
return self.conn_set.run_sql_query_from_conn_name(query, connection_name, placeholders)
|
|
91
92
|
|
|
92
93
|
async def _trigger(self, conn: duckdb.DuckDBPyConnection, placeholders: dict = {}) -> None:
|
|
@@ -132,11 +133,13 @@ class DataModel(metaclass=ABCMeta):
|
|
|
132
133
|
|
|
133
134
|
def _create_table_from_df(self, conn: duckdb.DuckDBPyConnection, query_result: pl.LazyFrame | pd.DataFrame):
|
|
134
135
|
local_conn = conn.cursor()
|
|
136
|
+
# local_conn = conn
|
|
135
137
|
try:
|
|
136
|
-
|
|
137
|
-
local_conn.execute(f"CREATE OR REPLACE TABLE {self.name} AS
|
|
138
|
+
assert query_result is not None
|
|
139
|
+
local_conn.execute(f"CREATE OR REPLACE TABLE {self.name} AS FROM query_result")
|
|
138
140
|
finally:
|
|
139
141
|
local_conn.close()
|
|
142
|
+
# pass
|
|
140
143
|
|
|
141
144
|
def process_pass_through_columns(self, models_dict: dict[str, DataModel]) -> None:
|
|
142
145
|
pass
|
|
@@ -171,19 +174,26 @@ class StaticModel(DataModel):
|
|
|
171
174
|
def _get_result(self, conn: duckdb.DuckDBPyConnection) -> pl.LazyFrame:
|
|
172
175
|
local_conn = conn.cursor()
|
|
173
176
|
try:
|
|
174
|
-
return self._load_duckdb_view_to_python_df(local_conn,
|
|
177
|
+
return self._load_duckdb_view_to_python_df(local_conn, use_datalake=True)
|
|
175
178
|
except Exception as e:
|
|
176
|
-
raise InvalidInputError(
|
|
179
|
+
raise InvalidInputError(409, f'dependent_data_model_not_found', f'Model "{self.name}" depends on static data models that cannot be found. Try building the Virtual Data Lake (VDL) first.')
|
|
177
180
|
finally:
|
|
178
181
|
local_conn.close()
|
|
179
182
|
|
|
180
183
|
async def run_model(self, conn: duckdb.DuckDBPyConnection, placeholders: dict = {}) -> None:
|
|
181
|
-
start = time.time()
|
|
182
|
-
|
|
183
184
|
if (self.needs_python_df or self.is_target) and self.result is None:
|
|
185
|
+
start = time.time()
|
|
186
|
+
|
|
184
187
|
self.result = await asyncio.to_thread(self._get_result, conn)
|
|
185
|
-
|
|
186
|
-
|
|
188
|
+
|
|
189
|
+
self.logger.log_activity_time(
|
|
190
|
+
f"loading {self.model_type.value} model '{self.name}' into memory", start,
|
|
191
|
+
additional_data={
|
|
192
|
+
"activity": "loading static data model into memory",
|
|
193
|
+
"model_name": self.name,
|
|
194
|
+
"model_type": self.model_type.value
|
|
195
|
+
}
|
|
196
|
+
)
|
|
187
197
|
|
|
188
198
|
await super().run_model(conn, placeholders)
|
|
189
199
|
|
|
@@ -224,10 +234,18 @@ class Seed(StaticModel):
|
|
|
224
234
|
start = time.time()
|
|
225
235
|
|
|
226
236
|
print(f"[{u.get_current_time()}] 🔨 BUILDING: seed model '{self.name}'")
|
|
227
|
-
await asyncio.to_thread(self._create_table_from_df, conn, self.result)
|
|
237
|
+
# await asyncio.to_thread(self._create_table_from_df, conn, self.result)
|
|
238
|
+
self._create_table_from_df(conn, self.result) # without threading
|
|
228
239
|
|
|
229
240
|
print(f"[{u.get_current_time()}] ✅ FINISHED: seed model '{self.name}'")
|
|
230
|
-
self.logger.log_activity_time(
|
|
241
|
+
self.logger.log_activity_time(
|
|
242
|
+
f"building seed model '{self.name}' into VDL", start,
|
|
243
|
+
additional_data={
|
|
244
|
+
"activity": "building data model into VDL",
|
|
245
|
+
"model_name": self.name,
|
|
246
|
+
"model_type": self.model_type.value
|
|
247
|
+
}
|
|
248
|
+
)
|
|
231
249
|
|
|
232
250
|
await super().build_model(conn, full_refresh)
|
|
233
251
|
|
|
@@ -239,22 +257,34 @@ class SourceModel(StaticModel):
|
|
|
239
257
|
@property
|
|
240
258
|
def model_type(self) -> ModelType:
|
|
241
259
|
return ModelType.SOURCE
|
|
260
|
+
|
|
261
|
+
@property
|
|
262
|
+
def connection_props(self) -> ConnectionProperties:
|
|
263
|
+
conn_name = self.model_config.get_connection()
|
|
264
|
+
conn_props = self.conn_set.get_connection(conn_name)
|
|
265
|
+
if isinstance(conn_props, ConnectionProperties):
|
|
266
|
+
return conn_props
|
|
267
|
+
raise u.ConfigurationError(f'Unable to use connection "{conn_name}" for source "{self.name}". Connection "{conn_name}" must be a ConnectionProperties object')
|
|
242
268
|
|
|
243
269
|
@property
|
|
244
270
|
def is_queryable(self) -> bool:
|
|
245
|
-
|
|
271
|
+
connection_props = self.connection_props
|
|
272
|
+
return self.model_config.load_to_vdl or connection_props.type == ConnectionTypeEnum.DUCKDB
|
|
246
273
|
|
|
247
274
|
def _build_source_model(self, conn: duckdb.DuckDBPyConnection, full_refresh: bool) -> None:
|
|
248
275
|
local_conn = conn.cursor()
|
|
276
|
+
# local_conn = conn
|
|
277
|
+
|
|
278
|
+
local_conn.begin()
|
|
249
279
|
try:
|
|
250
280
|
source = self.model_config
|
|
251
281
|
conn_name = source.get_connection()
|
|
252
282
|
|
|
253
|
-
connection_props = self.
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
raise u.ConfigurationError(f'
|
|
283
|
+
connection_props = self.connection_props
|
|
284
|
+
dialect = connection_props.dialect
|
|
285
|
+
attach_uri = connection_props.attach_uri_for_duckdb
|
|
286
|
+
if attach_uri is None:
|
|
287
|
+
raise u.ConfigurationError(f'Loading to duckdb is not supported for source "{self.name}" since its connection "{conn_name}" uses an unsupported dialect')
|
|
258
288
|
|
|
259
289
|
result = u.run_duckdb_stmt(self.logger, local_conn, f"FROM (SHOW DATABASES) WHERE database_name = 'db_{conn_name}'").fetchone()
|
|
260
290
|
if result is None:
|
|
@@ -264,8 +294,9 @@ class SourceModel(StaticModel):
|
|
|
264
294
|
new_table_name = self.name
|
|
265
295
|
|
|
266
296
|
if len(source.columns) == 0:
|
|
267
|
-
stmt = f"CREATE OR REPLACE TABLE {new_table_name} AS
|
|
297
|
+
stmt = f"CREATE OR REPLACE TABLE {new_table_name} AS FROM db_{conn_name}.{table_name}"
|
|
268
298
|
u.run_duckdb_stmt(self.logger, local_conn, stmt)
|
|
299
|
+
local_conn.commit()
|
|
269
300
|
return
|
|
270
301
|
|
|
271
302
|
increasing_column = source.update_hints.increasing_column
|
|
@@ -292,25 +323,44 @@ class SourceModel(StaticModel):
|
|
|
292
323
|
if max_val_of_incr_col is None:
|
|
293
324
|
recreate_table = True
|
|
294
325
|
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
326
|
+
query = source.get_query_for_upsert(dialect, conn_name, table_name, max_val_of_incr_col, full_refresh=recreate_table)
|
|
327
|
+
|
|
328
|
+
primary_keys = ", ".join(source.primary_key) if source.primary_key else ""
|
|
329
|
+
match_condition = f"USING ({primary_keys})" if primary_keys else "ON false"
|
|
330
|
+
stmt = (
|
|
331
|
+
f"MERGE INTO {new_table_name} "
|
|
332
|
+
f"USING ({query}) AS src "
|
|
333
|
+
f"{match_condition} "
|
|
334
|
+
f"WHEN MATCHED THEN UPDATE "
|
|
335
|
+
f"WHEN NOT MATCHED THEN INSERT BY NAME"
|
|
336
|
+
)
|
|
299
337
|
u.run_duckdb_stmt(self.logger, local_conn, stmt)
|
|
338
|
+
|
|
339
|
+
local_conn.commit()
|
|
340
|
+
|
|
300
341
|
finally:
|
|
301
342
|
local_conn.close()
|
|
343
|
+
# pass
|
|
302
344
|
|
|
303
345
|
async def build_model(self, conn: duckdb.DuckDBPyConnection, full_refresh: bool) -> None:
|
|
304
|
-
if self.model_config.
|
|
346
|
+
if self.model_config.load_to_vdl:
|
|
305
347
|
start = time.time()
|
|
306
348
|
print(f"[{u.get_current_time()}] 🔨 BUILDING: source model '{self.name}'")
|
|
307
349
|
|
|
308
|
-
await asyncio.to_thread(self._build_source_model, conn, full_refresh)
|
|
350
|
+
# await asyncio.to_thread(self._build_source_model, conn, full_refresh)
|
|
351
|
+
self._build_source_model(conn, full_refresh) # without threading
|
|
309
352
|
|
|
310
353
|
print(f"[{u.get_current_time()}] ✅ FINISHED: source model '{self.name}'")
|
|
311
|
-
self.logger.log_activity_time(
|
|
354
|
+
self.logger.log_activity_time(
|
|
355
|
+
f"building source model '{self.name}' into VDL", start,
|
|
356
|
+
additional_data={
|
|
357
|
+
"activity": "building data model into VDL",
|
|
358
|
+
"model_name": self.name,
|
|
359
|
+
"model_type": self.model_type.value
|
|
360
|
+
}
|
|
361
|
+
)
|
|
312
362
|
|
|
313
|
-
|
|
363
|
+
await super().build_model(conn, full_refresh)
|
|
314
364
|
|
|
315
365
|
|
|
316
366
|
@dataclass
|
|
@@ -333,10 +383,16 @@ class QueryModel(DataModel):
|
|
|
333
383
|
raise u.ConfigurationError(f'Model "{self.name}" references unknown model "{dependent_model_name}"')
|
|
334
384
|
|
|
335
385
|
dep_model = models_dict[dependent_model_name]
|
|
336
|
-
if isinstance(dep_model, SourceModel) and not dep_model.model_config.
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
)
|
|
386
|
+
if isinstance(dep_model, SourceModel) and not dep_model.model_config.load_to_vdl:
|
|
387
|
+
# Allow when caller is Build or Federate AND the source connection is duckdb; else error
|
|
388
|
+
conn_name = dep_model.model_config.get_connection()
|
|
389
|
+
conn_props = self.conn_set.get_connection(conn_name)
|
|
390
|
+
is_duckdb_conn = isinstance(conn_props, ConnectionProperties) and conn_props.type == ConnectionTypeEnum.DUCKDB
|
|
391
|
+
if not is_duckdb_conn:
|
|
392
|
+
raise u.ConfigurationError(
|
|
393
|
+
f'Model "{self.name}" cannot reference source model "{dependent_model_name}". '
|
|
394
|
+
'To be referenced by a build or federate model, the source must have load_to_vdl=True or a duckdb connection type.'
|
|
395
|
+
)
|
|
340
396
|
|
|
341
397
|
self.model_config.depends_on.add(dependent_model_name)
|
|
342
398
|
return dependent_model_name
|
|
@@ -351,10 +407,10 @@ class QueryModel(DataModel):
|
|
|
351
407
|
def _get_compile_sql_model_args_from_ctx_args(
|
|
352
408
|
self, ctx: dict[str, Any], ctx_args: ContextArgs
|
|
353
409
|
) -> dict[str, Any]:
|
|
354
|
-
is_placeholder = lambda placeholder: placeholder in ctx_args.
|
|
410
|
+
is_placeholder = lambda placeholder: placeholder in ctx_args._placeholders_copy
|
|
355
411
|
kwargs = {
|
|
356
412
|
"proj_vars": ctx_args.proj_vars, "env_vars": ctx_args.env_vars, "user": ctx_args.user, "prms": ctx_args.prms,
|
|
357
|
-
"
|
|
413
|
+
"configurables": ctx_args.configurables, "ctx": ctx, "is_placeholder": is_placeholder, "set_placeholder": ctx_args.set_placeholder,
|
|
358
414
|
"param_exists": ctx_args.param_exists
|
|
359
415
|
}
|
|
360
416
|
return kwargs
|
|
@@ -424,6 +480,11 @@ class QueryModel(DataModel):
|
|
|
424
480
|
dependent_model_names.add(self.name)
|
|
425
481
|
for dep_model in self.upstreams.values():
|
|
426
482
|
dep_model.retrieve_dependent_query_models(dependent_model_names)
|
|
483
|
+
|
|
484
|
+
def _log_sql_to_run(self, sql: str, placeholders: dict[str, Any]) -> None:
|
|
485
|
+
log_msg = f"SQL to run for model '{self.name}':\n{sql}"
|
|
486
|
+
log_msg += f"\n\n(with placeholders: {placeholders})"
|
|
487
|
+
self.logger.debug(log_msg)
|
|
427
488
|
|
|
428
489
|
|
|
429
490
|
@dataclass
|
|
@@ -448,11 +509,11 @@ class DbviewModel(QueryModel):
|
|
|
448
509
|
if source_model.model_config.get_connection() != self.model_config.get_connection():
|
|
449
510
|
raise u.ConfigurationError(f'Dbview "{self.name}" references source "{source_name}" with different connection')
|
|
450
511
|
|
|
451
|
-
# Check if the source model has
|
|
452
|
-
if not source_model.model_config.
|
|
512
|
+
# Check if the source model has load_to_vdl=False but this dbview has translate_to_duckdb=True
|
|
513
|
+
if not source_model.model_config.load_to_vdl and self.model_config.translate_to_duckdb:
|
|
453
514
|
raise u.ConfigurationError(
|
|
454
515
|
f'Dbview "{self.name}" with translate_to_duckdb=True cannot reference source "{source_name}" '
|
|
455
|
-
f'which has
|
|
516
|
+
f'which has load_to_vdl=False'
|
|
456
517
|
)
|
|
457
518
|
|
|
458
519
|
self.model_config.depends_on.add(source_name)
|
|
@@ -460,14 +521,16 @@ class DbviewModel(QueryModel):
|
|
|
460
521
|
return "{{ source(\"" + source_name + "\") }}"
|
|
461
522
|
|
|
462
523
|
kwargs["source"] = source
|
|
524
|
+
kwargs["ref"] = source
|
|
463
525
|
return kwargs
|
|
464
526
|
|
|
465
527
|
def _get_duckdb_query(self, read_dialect: str, query: str) -> str:
|
|
466
528
|
kwargs = {
|
|
467
|
-
"source": lambda source_name: "
|
|
529
|
+
"source": lambda source_name: "vdl." + source_name
|
|
468
530
|
}
|
|
469
531
|
compiled_query = self._get_compiled_sql_query_str(query, kwargs)
|
|
470
|
-
|
|
532
|
+
duckdb_query = sqlglot.transpile(compiled_query, read=read_dialect, write="duckdb", pretty=True)[0]
|
|
533
|
+
return "-- translated to duckdb\n" + duckdb_query
|
|
471
534
|
|
|
472
535
|
def _compile_sql_model(self, kwargs: dict[str, Any]) -> mq.SqlModelQuery:
|
|
473
536
|
compiled_query_str = self._get_compiled_sql_query_str(self.query_file.raw_query, kwargs)
|
|
@@ -476,8 +539,13 @@ class DbviewModel(QueryModel):
|
|
|
476
539
|
connection_props = self.conn_set.get_connection(connection_name)
|
|
477
540
|
|
|
478
541
|
if self.model_config.translate_to_duckdb and isinstance(connection_props, ConnectionProperties):
|
|
542
|
+
# Forbid translate_to_duckdb when dbview connection is duckdb
|
|
543
|
+
if connection_props.type == ConnectionTypeEnum.DUCKDB:
|
|
544
|
+
raise u.ConfigurationError(
|
|
545
|
+
f'Dbview "{self.name}" has translate_to_duckdb=True but its connection is duckdb. Use a federate model instead.'
|
|
546
|
+
)
|
|
479
547
|
macros = {
|
|
480
|
-
"source": lambda source_name: "
|
|
548
|
+
"source": lambda source_name: "vdl." + source_name
|
|
481
549
|
}
|
|
482
550
|
compiled_query2 = self._get_compiled_sql_query_str(compiled_query_str, macros)
|
|
483
551
|
compiled_query_str = self._get_duckdb_query(connection_props.dialect, compiled_query2)
|
|
@@ -505,7 +573,14 @@ class DbviewModel(QueryModel):
|
|
|
505
573
|
kwargs = self._get_compile_sql_model_args(ctx, ctx_args, models_dict)
|
|
506
574
|
self.compiled_query = self._compile_sql_model(kwargs)
|
|
507
575
|
|
|
508
|
-
self.logger.log_activity_time(
|
|
576
|
+
self.logger.log_activity_time(
|
|
577
|
+
f"compiling dbview model '{self.name}'", start,
|
|
578
|
+
additional_data={
|
|
579
|
+
"activity": "compiling data model",
|
|
580
|
+
"model_name": self.name,
|
|
581
|
+
"model_type": self.model_type.value
|
|
582
|
+
}
|
|
583
|
+
)
|
|
509
584
|
|
|
510
585
|
async def _run_sql_model(self, conn: duckdb.DuckDBPyConnection, placeholders: dict = {}) -> None:
|
|
511
586
|
assert self.compiled_query is not None
|
|
@@ -518,19 +593,21 @@ class DbviewModel(QueryModel):
|
|
|
518
593
|
if is_duckdb:
|
|
519
594
|
local_conn = conn.cursor()
|
|
520
595
|
try:
|
|
521
|
-
self.logger.info(f"Running
|
|
596
|
+
self.logger.info(f"Running dbview '{self.name}' on duckdb")
|
|
522
597
|
return local_conn.sql(query, params=placeholders).pl()
|
|
523
598
|
except duckdb.CatalogException as e:
|
|
524
|
-
raise InvalidInputError(
|
|
599
|
+
raise InvalidInputError(409, f'dependent_data_model_not_found', f'Model "{self.name}" depends on static data models that cannot be found. Try building the Virtual Data Lake (VDL) first.')
|
|
525
600
|
except Exception as e:
|
|
526
601
|
raise RuntimeError(e)
|
|
527
602
|
finally:
|
|
528
603
|
local_conn.close()
|
|
529
604
|
else:
|
|
530
|
-
|
|
605
|
+
self.logger.info(f"Running dbview '{self.name}' on connection: {connection_name}")
|
|
606
|
+
return self.conn_set.run_sql_query_from_conn_name(query, connection_name, placeholders)
|
|
531
607
|
except RuntimeError as e:
|
|
532
608
|
raise FileExecutionError(f'Failed to run dbview sql model "{self.name}"', e)
|
|
533
609
|
|
|
610
|
+
self._log_sql_to_run(query, placeholders)
|
|
534
611
|
result = await asyncio.to_thread(run_sql_query_on_connection, is_duckdb, query, placeholders)
|
|
535
612
|
self.result = result.lazy()
|
|
536
613
|
|
|
@@ -539,7 +616,14 @@ class DbviewModel(QueryModel):
|
|
|
539
616
|
|
|
540
617
|
await self._run_sql_model(conn, placeholders)
|
|
541
618
|
|
|
542
|
-
self.logger.log_activity_time(
|
|
619
|
+
self.logger.log_activity_time(
|
|
620
|
+
f"running dbview model '{self.name}'", start,
|
|
621
|
+
additional_data={
|
|
622
|
+
"activity": "running data model",
|
|
623
|
+
"model_name": self.name,
|
|
624
|
+
"model_type": self.model_type.value
|
|
625
|
+
}
|
|
626
|
+
)
|
|
543
627
|
|
|
544
628
|
await super().run_model(conn, placeholders)
|
|
545
629
|
|
|
@@ -561,8 +645,16 @@ class FederateModel(QueryModel):
|
|
|
561
645
|
|
|
562
646
|
def ref(dependent_model_name: str) -> str:
|
|
563
647
|
dependent_model = self._ref_for_sql(dependent_model_name, models_dict)
|
|
564
|
-
|
|
565
|
-
|
|
648
|
+
dep = models_dict[dependent_model]
|
|
649
|
+
if isinstance(dep, BuildModel):
|
|
650
|
+
return "vdl." + dependent_model
|
|
651
|
+
if isinstance(dep, SourceModel):
|
|
652
|
+
if dep.model_config.load_to_vdl:
|
|
653
|
+
return "vdl." + dependent_model
|
|
654
|
+
conn_name = dep.model_config.get_connection()
|
|
655
|
+
table_name = dep.model_config.get_table()
|
|
656
|
+
return f"db_{conn_name}.{table_name}"
|
|
657
|
+
return dependent_model
|
|
566
658
|
|
|
567
659
|
kwargs["ref"] = ref
|
|
568
660
|
return kwargs
|
|
@@ -580,7 +672,7 @@ class FederateModel(QueryModel):
|
|
|
580
672
|
connections = self.conn_set.get_connections_as_dict()
|
|
581
673
|
|
|
582
674
|
def run_external_sql(connection_name: str, sql_query: str) -> pl.DataFrame:
|
|
583
|
-
return self._run_sql_query_on_connection(connection_name, sql_query, ctx_args.
|
|
675
|
+
return self._run_sql_query_on_connection(connection_name, sql_query, ctx_args._placeholders_copy)
|
|
584
676
|
|
|
585
677
|
conn_args = ConnectionsArgs(ctx_args.project_path, ctx_args.proj_vars, ctx_args.env_vars)
|
|
586
678
|
build_model_args = BuildModelArgs(conn_args, connections, dependencies, self._ref_for_python, run_external_sql)
|
|
@@ -616,7 +708,14 @@ class FederateModel(QueryModel):
|
|
|
616
708
|
else:
|
|
617
709
|
raise NotImplementedError(f"Query type not supported: {self.query_file.__class__.__name__}")
|
|
618
710
|
|
|
619
|
-
self.logger.log_activity_time(
|
|
711
|
+
self.logger.log_activity_time(
|
|
712
|
+
f"compiling federate model '{self.name}'", start,
|
|
713
|
+
additional_data={
|
|
714
|
+
"activity": "compiling data model",
|
|
715
|
+
"model_name": self.name,
|
|
716
|
+
"model_type": self.model_type.value
|
|
717
|
+
}
|
|
718
|
+
)
|
|
620
719
|
|
|
621
720
|
if not recurse:
|
|
622
721
|
return
|
|
@@ -636,17 +735,22 @@ class FederateModel(QueryModel):
|
|
|
636
735
|
query = compiled_query.query
|
|
637
736
|
|
|
638
737
|
def create_table(local_conn: duckdb.DuckDBPyConnection):
|
|
639
|
-
|
|
640
|
-
|
|
738
|
+
# DuckDB doesn't support specifying named parameters that are not used in the query, so filtering them out
|
|
739
|
+
placeholder_exists = lambda key: re.search(r"\$" + key + r"(?!\w)", query)
|
|
740
|
+
existing_placeholders = {key: value for key, value in placeholders.items() if placeholder_exists(key)}
|
|
641
741
|
|
|
642
742
|
create_query = self.model_config.get_sql_for_create(self.name, query)
|
|
743
|
+
self._log_sql_to_run(create_query, existing_placeholders)
|
|
643
744
|
try:
|
|
644
745
|
return local_conn.execute(create_query, existing_placeholders)
|
|
645
746
|
except duckdb.CatalogException as e:
|
|
646
|
-
|
|
747
|
+
if self.name == "__fake_target":
|
|
748
|
+
raise InvalidInputError(409, "invalid_sql_query", f"Provided SQL query depends on static data models that cannot be found. Try building the Virtual Data Lake (VDL) first.")
|
|
749
|
+
else:
|
|
750
|
+
raise InvalidInputError(409, f'dependent_data_model_not_found', f'Model "{self.name}" depends on static data models that cannot be found. Try building the Virtual Data Lake (VDL) first.')
|
|
647
751
|
except Exception as e:
|
|
648
752
|
if self.name == "__fake_target":
|
|
649
|
-
raise InvalidInputError(
|
|
753
|
+
raise InvalidInputError(400, "invalid_sql_query", f"Failed to run provided SQL query")
|
|
650
754
|
else:
|
|
651
755
|
raise FileExecutionError(f'Failed to run federate sql model "{self.name}"', e) from e
|
|
652
756
|
|
|
@@ -673,7 +777,14 @@ class FederateModel(QueryModel):
|
|
|
673
777
|
else:
|
|
674
778
|
raise NotImplementedError(f"Query type not supported: {self.query_file.__class__.__name__}")
|
|
675
779
|
|
|
676
|
-
self.logger.log_activity_time(
|
|
780
|
+
self.logger.log_activity_time(
|
|
781
|
+
f"running federate model '{self.name}'", start,
|
|
782
|
+
additional_data={
|
|
783
|
+
"activity": "running data model",
|
|
784
|
+
"model_name": self.name,
|
|
785
|
+
"model_type": self.model_type.value
|
|
786
|
+
}
|
|
787
|
+
)
|
|
677
788
|
|
|
678
789
|
await super().run_model(conn, placeholders)
|
|
679
790
|
|
|
@@ -703,7 +814,12 @@ class BuildModel(StaticModel, QueryModel):
|
|
|
703
814
|
}
|
|
704
815
|
|
|
705
816
|
def ref_for_build(dependent_model_name: str) -> str:
|
|
706
|
-
dependent_model = self._ref_for_sql(dependent_model_name,
|
|
817
|
+
dependent_model = self._ref_for_sql(dependent_model_name, models_dict)
|
|
818
|
+
dep = models_dict[dependent_model]
|
|
819
|
+
if isinstance(dep, SourceModel) and not dep.model_config.load_to_vdl:
|
|
820
|
+
conn_name = dep.model_config.get_connection()
|
|
821
|
+
table_name = dep.model_config.get_table()
|
|
822
|
+
return f"db_{conn_name}.{table_name}"
|
|
707
823
|
return dependent_model
|
|
708
824
|
|
|
709
825
|
kwargs["ref"] = ref_for_build
|
|
@@ -756,7 +872,14 @@ class BuildModel(StaticModel, QueryModel):
|
|
|
756
872
|
else:
|
|
757
873
|
raise NotImplementedError(f"Query type not supported: {self.query_file.__class__.__name__}")
|
|
758
874
|
|
|
759
|
-
self.logger.log_activity_time(
|
|
875
|
+
self.logger.log_activity_time(
|
|
876
|
+
f"compiling build model '{self.name}'", start,
|
|
877
|
+
additional_data={
|
|
878
|
+
"activity": "compiling data model",
|
|
879
|
+
"model_name": self.name,
|
|
880
|
+
"model_type": self.model_type.value
|
|
881
|
+
}
|
|
882
|
+
)
|
|
760
883
|
|
|
761
884
|
dependencies = self.model_config.depends_on
|
|
762
885
|
self.wait_count_for_build = len(dependencies)
|
|
@@ -771,14 +894,17 @@ class BuildModel(StaticModel, QueryModel):
|
|
|
771
894
|
def create_table():
|
|
772
895
|
create_query = self.model_config.get_sql_for_build(self.name, query)
|
|
773
896
|
local_conn = conn.cursor()
|
|
897
|
+
# local_conn = conn
|
|
774
898
|
try:
|
|
775
|
-
return u.run_duckdb_stmt(self.logger, local_conn, create_query)
|
|
899
|
+
return u.run_duckdb_stmt(self.logger, local_conn, create_query, model_name=self.name)
|
|
776
900
|
except Exception as e:
|
|
777
901
|
raise FileExecutionError(f'Failed to build static sql model "{self.name}"', e) from e
|
|
778
902
|
finally:
|
|
779
903
|
local_conn.close()
|
|
904
|
+
# pass
|
|
780
905
|
|
|
781
|
-
await asyncio.to_thread(create_table)
|
|
906
|
+
# await asyncio.to_thread(create_table)
|
|
907
|
+
create_table() # without threading
|
|
782
908
|
|
|
783
909
|
async def _build_python_model(self, compiled_query: mq.PyModelQuery, conn: duckdb.DuckDBPyConnection) -> None:
|
|
784
910
|
query_result = await asyncio.to_thread(compiled_query.query)
|
|
@@ -786,7 +912,8 @@ class BuildModel(StaticModel, QueryModel):
|
|
|
786
912
|
query_result = pl.from_pandas(query_result).lazy()
|
|
787
913
|
if self.needs_python_df_for_build:
|
|
788
914
|
self.result = query_result.lazy()
|
|
789
|
-
await asyncio.to_thread(self._create_table_from_df, conn, query_result)
|
|
915
|
+
# await asyncio.to_thread(self._create_table_from_df, conn, query_result)
|
|
916
|
+
self._create_table_from_df(conn, query_result) # without threading
|
|
790
917
|
|
|
791
918
|
async def build_model(self, conn: duckdb.DuckDBPyConnection, full_refresh: bool) -> None:
|
|
792
919
|
start = time.time()
|
|
@@ -799,24 +926,33 @@ class BuildModel(StaticModel, QueryModel):
|
|
|
799
926
|
def load_df(conn: duckdb.DuckDBPyConnection, dep_model: DataModel):
|
|
800
927
|
if dep_model.result is None:
|
|
801
928
|
local_conn = conn.cursor()
|
|
929
|
+
# local_conn = conn
|
|
802
930
|
try:
|
|
803
931
|
dep_model.result = dep_model._load_duckdb_view_to_python_df(local_conn)
|
|
804
932
|
finally:
|
|
805
933
|
local_conn.close()
|
|
934
|
+
# pass
|
|
806
935
|
|
|
807
936
|
coroutines = []
|
|
808
937
|
for dep_model in self.upstreams_for_build.values():
|
|
809
938
|
coro = asyncio.to_thread(load_df, conn, dep_model)
|
|
810
939
|
coroutines.append(coro)
|
|
811
940
|
await u.asyncio_gather(coroutines)
|
|
812
|
-
|
|
941
|
+
|
|
813
942
|
# Then run the model's Python function to build the model
|
|
814
943
|
await self._build_python_model(self.compiled_query, conn)
|
|
815
944
|
else:
|
|
816
945
|
raise NotImplementedError(f"Query type not supported: {self.query_file.__class__.__name__}")
|
|
817
946
|
|
|
818
947
|
print(f"[{u.get_current_time()}] ✅ FINISHED: build model '{self.name}'")
|
|
819
|
-
self.logger.log_activity_time(
|
|
948
|
+
self.logger.log_activity_time(
|
|
949
|
+
f"building static build model '{self.name}' into VDL", start,
|
|
950
|
+
additional_data={
|
|
951
|
+
"activity": "building data model into VDL",
|
|
952
|
+
"model_name": self.name,
|
|
953
|
+
"model_type": self.model_type.value
|
|
954
|
+
}
|
|
955
|
+
)
|
|
820
956
|
|
|
821
957
|
await super().build_model(conn, full_refresh)
|
|
822
958
|
|
|
@@ -826,7 +962,7 @@ class DAG:
|
|
|
826
962
|
dataset: DatasetConfig | None
|
|
827
963
|
target_model: DataModel
|
|
828
964
|
models_dict: dict[str, DataModel]
|
|
829
|
-
|
|
965
|
+
datalake_db_path: str | None = field(default=None)
|
|
830
966
|
logger: u.Logger = field(default_factory=lambda: u.Logger(""))
|
|
831
967
|
parameter_set: ParameterSet | None = field(default=None, init=False) # set in apply_selections
|
|
832
968
|
placeholders: dict[str, Any] = field(init=False, default_factory=dict)
|
|
@@ -835,36 +971,50 @@ class DAG:
|
|
|
835
971
|
return f" for dataset '{self.dataset.name}'" if self.dataset else ""
|
|
836
972
|
|
|
837
973
|
def compile_build_models(self, conn_args: ConnectionsArgs) -> None:
|
|
838
|
-
static_models: dict[str, StaticModel] = {
|
|
974
|
+
static_models: dict[str, StaticModel] = {
|
|
975
|
+
k: v for k, v in self.models_dict.items() if isinstance(v, StaticModel)
|
|
976
|
+
}
|
|
839
977
|
for model in static_models.values():
|
|
840
978
|
if isinstance(model, BuildModel):
|
|
841
979
|
model.compile_for_build(conn_args, static_models)
|
|
842
980
|
|
|
843
981
|
def apply_selections(
|
|
844
|
-
self, param_cfg_set: ParameterConfigsSet, user:
|
|
982
|
+
self, param_cfg_set: ParameterConfigsSet, user: AbstractUser, selections: dict[str, str]
|
|
845
983
|
) -> None:
|
|
846
984
|
start = time.time()
|
|
985
|
+
|
|
847
986
|
dataset_params = self.dataset.parameters if self.dataset else None
|
|
848
987
|
parameter_set = param_cfg_set.apply_selections(dataset_params, selections, user)
|
|
849
988
|
self.parameter_set = parameter_set
|
|
850
989
|
msg_extension = self._get_msg_extension()
|
|
851
|
-
|
|
990
|
+
|
|
991
|
+
dataset_name = self.dataset.name if self.dataset else None
|
|
992
|
+
self.logger.log_activity_time(
|
|
993
|
+
"applying selections" + msg_extension, start,
|
|
994
|
+
additional_data={"activity": "applying selections", "dataset_name": dataset_name}
|
|
995
|
+
)
|
|
852
996
|
|
|
853
997
|
def _compile_context(
|
|
854
|
-
self, param_args: ParametersArgs, context_func: ContextFunc, user:
|
|
998
|
+
self, param_args: ParametersArgs, context_func: ContextFunc, user: AbstractUser, configurables: dict[str, str]
|
|
855
999
|
) -> tuple[dict[str, Any], ContextArgs]:
|
|
856
1000
|
start = time.time()
|
|
1001
|
+
|
|
857
1002
|
context = {}
|
|
858
1003
|
assert isinstance(self.parameter_set, ParameterSet)
|
|
859
1004
|
prms = self.parameter_set.get_parameters_as_dict()
|
|
860
|
-
|
|
861
|
-
args = ContextArgs(param_args, user, prms, traits)
|
|
1005
|
+
args = ContextArgs(param_args, user, prms, configurables)
|
|
862
1006
|
msg_extension = self._get_msg_extension()
|
|
1007
|
+
|
|
863
1008
|
try:
|
|
864
1009
|
context_func(context, args)
|
|
865
1010
|
except Exception as e:
|
|
866
1011
|
raise FileExecutionError(f'Failed to run {c.CONTEXT_FILE}' + msg_extension, e) from e
|
|
867
|
-
|
|
1012
|
+
|
|
1013
|
+
dataset_name = self.dataset.name if self.dataset else None
|
|
1014
|
+
self.logger.log_activity_time(
|
|
1015
|
+
"running context.py" + msg_extension, start,
|
|
1016
|
+
additional_data={"activity": "running context.py", "dataset_name": dataset_name}
|
|
1017
|
+
)
|
|
868
1018
|
return context, args
|
|
869
1019
|
|
|
870
1020
|
def _compile_models(self, context: dict[str, Any], ctx_args: ContextArgs, recurse: bool) -> None:
|
|
@@ -875,29 +1025,25 @@ class DAG:
|
|
|
875
1025
|
terminal_nodes = self.target_model.get_terminal_nodes(set())
|
|
876
1026
|
for model in self.models_dict.values():
|
|
877
1027
|
model.confirmed_no_cycles = False
|
|
878
|
-
self.logger.log_activity_time(
|
|
1028
|
+
self.logger.log_activity_time("validating no cycles in model dependencies", start)
|
|
879
1029
|
return terminal_nodes
|
|
880
1030
|
|
|
1031
|
+
def _attach_connections_with_type_duckdb(self, conn: duckdb.DuckDBPyConnection) -> None:
|
|
1032
|
+
for conn_name, connection in self.target_model.conn_set.get_connections_as_dict().items():
|
|
1033
|
+
if not isinstance(connection, ConnectionProperties):
|
|
1034
|
+
continue
|
|
1035
|
+
attach_uri = connection.attach_uri_for_duckdb
|
|
1036
|
+
if attach_uri is None:
|
|
1037
|
+
continue
|
|
1038
|
+
attach_stmt = f"ATTACH IF NOT EXISTS '{attach_uri}' AS db_{conn_name} (READ_ONLY)"
|
|
1039
|
+
u.run_duckdb_stmt(self.logger, conn, attach_stmt, redacted_values=[attach_uri])
|
|
1040
|
+
|
|
881
1041
|
async def _run_models(self) -> None:
|
|
882
1042
|
terminal_nodes = self._get_terminal_nodes()
|
|
883
1043
|
|
|
884
|
-
|
|
885
|
-
try:
|
|
886
|
-
conn = duckdb.connect(self.duckdb_filepath)
|
|
887
|
-
conn.close()
|
|
888
|
-
except duckdb.IOException as e:
|
|
889
|
-
# unable to create duckdb venv file means it's in use and already exists
|
|
890
|
-
# do not throw error here since attaching in read-only mode later may still work
|
|
891
|
-
pass
|
|
892
|
-
|
|
893
|
-
conn = u.create_duckdb_connection()
|
|
1044
|
+
conn = u.create_duckdb_connection(datalake_db_path=self.datalake_db_path)
|
|
894
1045
|
try:
|
|
895
|
-
|
|
896
|
-
try:
|
|
897
|
-
conn.execute(f"ATTACH '{self.duckdb_filepath}' AS venv {read_only}")
|
|
898
|
-
except duckdb.IOException as e:
|
|
899
|
-
self.logger.warn(f"Unable to attach to duckdb venv file: {self.duckdb_filepath}")
|
|
900
|
-
raise e
|
|
1046
|
+
self._attach_connections_with_type_duckdb(conn)
|
|
901
1047
|
|
|
902
1048
|
coroutines = []
|
|
903
1049
|
for model_name in terminal_nodes:
|
|
@@ -909,18 +1055,18 @@ class DAG:
|
|
|
909
1055
|
conn.close()
|
|
910
1056
|
|
|
911
1057
|
async def execute(
|
|
912
|
-
self, param_args: ParametersArgs, param_cfg_set: ParameterConfigsSet, context_func: ContextFunc, user:
|
|
913
|
-
*, runquery: bool = True, recurse: bool = True,
|
|
1058
|
+
self, param_args: ParametersArgs, param_cfg_set: ParameterConfigsSet, context_func: ContextFunc, user: AbstractUser, selections: dict[str, str],
|
|
1059
|
+
*, runquery: bool = True, recurse: bool = True, configurables: dict[str, str] = {}
|
|
914
1060
|
) -> None:
|
|
915
1061
|
recurse = (recurse or runquery)
|
|
916
1062
|
|
|
917
1063
|
self.apply_selections(param_cfg_set, user, selections)
|
|
918
1064
|
|
|
919
|
-
context, ctx_args = self._compile_context(param_args, context_func, user,
|
|
1065
|
+
context, ctx_args = self._compile_context(param_args, context_func, user, configurables)
|
|
920
1066
|
|
|
921
1067
|
self._compile_models(context, ctx_args, recurse)
|
|
922
1068
|
|
|
923
|
-
self.placeholders = ctx_args.
|
|
1069
|
+
self.placeholders = ctx_args._placeholders_copy
|
|
924
1070
|
if runquery:
|
|
925
1071
|
await self._run_models()
|
|
926
1072
|
|
|
@@ -931,39 +1077,24 @@ class DAG:
|
|
|
931
1077
|
self.target_model.retrieve_dependent_query_models(all_model_names)
|
|
932
1078
|
return all_model_names
|
|
933
1079
|
|
|
934
|
-
def
|
|
935
|
-
G = nx.DiGraph()
|
|
936
|
-
|
|
937
|
-
for model_name, model in self.models_dict.items():
|
|
938
|
-
level = model.get_max_path_length_to_target()
|
|
939
|
-
if level is not None:
|
|
940
|
-
G.add_node(model_name, layer=-level, model_type=model.model_type)
|
|
941
|
-
|
|
942
|
-
for model_name in G.nodes:
|
|
943
|
-
model = self.models_dict[model_name]
|
|
944
|
-
for dep_model_name in model.downstreams:
|
|
945
|
-
G.add_edge(model_name, dep_model_name)
|
|
946
|
-
|
|
947
|
-
return G
|
|
948
|
-
|
|
949
|
-
def get_all_data_models(self) -> list[arm.DataModelItem]:
|
|
1080
|
+
def get_all_data_models(self) -> list[rm.DataModelItem]:
|
|
950
1081
|
data_models = []
|
|
951
1082
|
for model_name, model in self.models_dict.items():
|
|
952
1083
|
is_queryable = model.is_queryable
|
|
953
|
-
data_model =
|
|
1084
|
+
data_model = rm.DataModelItem(name=model_name, model_type=model.model_type.value, config=model.model_config, is_queryable=is_queryable)
|
|
954
1085
|
data_models.append(data_model)
|
|
955
1086
|
return data_models
|
|
956
1087
|
|
|
957
|
-
def get_all_model_lineage(self) -> list[
|
|
1088
|
+
def get_all_model_lineage(self) -> list[rm.LineageRelation]:
|
|
958
1089
|
model_lineage = []
|
|
959
1090
|
for model_name, model in self.models_dict.items():
|
|
960
1091
|
if not isinstance(model, QueryModel):
|
|
961
1092
|
continue
|
|
962
1093
|
for dep_model_name in model.model_config.depends_on:
|
|
963
1094
|
edge_type = "buildtime" if isinstance(model, BuildModel) else "runtime"
|
|
964
|
-
source_model =
|
|
965
|
-
target_model =
|
|
966
|
-
model_lineage.append(
|
|
1095
|
+
source_model = rm.LineageNode(name=dep_model_name, type="model")
|
|
1096
|
+
target_model = rm.LineageNode(name=model_name, type="model")
|
|
1097
|
+
model_lineage.append(rm.LineageRelation(type=edge_type, source=source_model, target=target_model))
|
|
967
1098
|
return model_lineage
|
|
968
1099
|
|
|
969
1100
|
|