squirrels 0.4.1__py3-none-any.whl → 0.5.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of squirrels might be problematic. Click here for more details.

Files changed (80) hide show
  1. squirrels/__init__.py +10 -6
  2. squirrels/_api_response_models.py +93 -44
  3. squirrels/_api_server.py +571 -219
  4. squirrels/_auth.py +451 -0
  5. squirrels/_command_line.py +61 -20
  6. squirrels/_connection_set.py +38 -25
  7. squirrels/_constants.py +44 -34
  8. squirrels/_dashboards_io.py +34 -16
  9. squirrels/_exceptions.py +57 -0
  10. squirrels/_initializer.py +117 -44
  11. squirrels/_manifest.py +124 -62
  12. squirrels/_model_builder.py +111 -0
  13. squirrels/_model_configs.py +74 -0
  14. squirrels/_model_queries.py +52 -0
  15. squirrels/_models.py +860 -354
  16. squirrels/_package_loader.py +8 -4
  17. squirrels/_parameter_configs.py +45 -65
  18. squirrels/_parameter_sets.py +15 -13
  19. squirrels/_project.py +561 -0
  20. squirrels/_py_module.py +4 -3
  21. squirrels/_seeds.py +35 -16
  22. squirrels/_sources.py +106 -0
  23. squirrels/_utils.py +166 -63
  24. squirrels/_version.py +1 -1
  25. squirrels/arguments/init_time_args.py +78 -15
  26. squirrels/arguments/run_time_args.py +62 -101
  27. squirrels/dashboards.py +4 -4
  28. squirrels/data_sources.py +94 -162
  29. squirrels/dataset_result.py +86 -0
  30. squirrels/dateutils.py +4 -4
  31. squirrels/package_data/base_project/.env +30 -0
  32. squirrels/package_data/base_project/.env.example +30 -0
  33. squirrels/package_data/base_project/.gitignore +3 -2
  34. squirrels/package_data/base_project/assets/expenses.db +0 -0
  35. squirrels/package_data/base_project/connections.yml +11 -3
  36. squirrels/package_data/base_project/dashboards/dashboard_example.py +15 -13
  37. squirrels/package_data/base_project/dashboards/dashboard_example.yml +22 -0
  38. squirrels/package_data/base_project/docker/.dockerignore +5 -2
  39. squirrels/package_data/base_project/docker/Dockerfile +3 -3
  40. squirrels/package_data/base_project/docker/compose.yml +1 -1
  41. squirrels/package_data/base_project/duckdb_init.sql +9 -0
  42. squirrels/package_data/base_project/macros/macros_example.sql +15 -0
  43. squirrels/package_data/base_project/models/builds/build_example.py +26 -0
  44. squirrels/package_data/base_project/models/builds/build_example.sql +16 -0
  45. squirrels/package_data/base_project/models/builds/build_example.yml +55 -0
  46. squirrels/package_data/base_project/models/dbviews/dbview_example.sql +12 -22
  47. squirrels/package_data/base_project/models/dbviews/dbview_example.yml +26 -0
  48. squirrels/package_data/base_project/models/federates/federate_example.py +38 -15
  49. squirrels/package_data/base_project/models/federates/federate_example.sql +16 -2
  50. squirrels/package_data/base_project/models/federates/federate_example.yml +65 -0
  51. squirrels/package_data/base_project/models/sources.yml +39 -0
  52. squirrels/package_data/base_project/parameters.yml +36 -21
  53. squirrels/package_data/base_project/pyconfigs/connections.py +6 -11
  54. squirrels/package_data/base_project/pyconfigs/context.py +20 -33
  55. squirrels/package_data/base_project/pyconfigs/parameters.py +19 -21
  56. squirrels/package_data/base_project/pyconfigs/user.py +23 -0
  57. squirrels/package_data/base_project/seeds/seed_categories.yml +15 -0
  58. squirrels/package_data/base_project/seeds/seed_subcategories.csv +15 -15
  59. squirrels/package_data/base_project/seeds/seed_subcategories.yml +21 -0
  60. squirrels/package_data/base_project/squirrels.yml.j2 +17 -40
  61. squirrels/parameters.py +20 -20
  62. {squirrels-0.4.1.dist-info → squirrels-0.5.0b1.dist-info}/METADATA +31 -32
  63. squirrels-0.5.0b1.dist-info/RECORD +70 -0
  64. {squirrels-0.4.1.dist-info → squirrels-0.5.0b1.dist-info}/WHEEL +1 -1
  65. squirrels-0.5.0b1.dist-info/entry_points.txt +3 -0
  66. {squirrels-0.4.1.dist-info → squirrels-0.5.0b1.dist-info/licenses}/LICENSE +1 -1
  67. squirrels/_authenticator.py +0 -85
  68. squirrels/_environcfg.py +0 -84
  69. squirrels/package_data/assets/favicon.ico +0 -0
  70. squirrels/package_data/assets/index.css +0 -1
  71. squirrels/package_data/assets/index.js +0 -58
  72. squirrels/package_data/base_project/dashboards.yml +0 -10
  73. squirrels/package_data/base_project/env.yml +0 -29
  74. squirrels/package_data/base_project/models/dbviews/dbview_example.py +0 -47
  75. squirrels/package_data/base_project/pyconfigs/auth.py +0 -45
  76. squirrels/package_data/templates/index.html +0 -18
  77. squirrels/project.py +0 -378
  78. squirrels/user_base.py +0 -55
  79. squirrels-0.4.1.dist-info/RECORD +0 -60
  80. squirrels-0.4.1.dist-info/entry_points.txt +0 -4
squirrels/_models.py CHANGED
@@ -1,147 +1,120 @@
1
1
  from __future__ import annotations
2
- from typing import Iterable, Callable, Any
3
- from dataclasses import dataclass, field
2
+ from typing import Callable, Any
3
+ from dataclasses import dataclass, field, KW_ONLY
4
4
  from abc import ABCMeta, abstractmethod
5
5
  from enum import Enum
6
6
  from pathlib import Path
7
- from sqlalchemy import create_engine, text, Connection
8
- import asyncio, os, time, pandas as pd, networkx as nx
9
-
10
- from . import _constants as c, _utils as u, _py_module as pm
11
- from .arguments.run_time_args import ContextArgs, ModelDepsArgs, ModelArgs
12
- from ._authenticator import User
13
- from ._connection_set import ConnectionSet
14
- from ._manifest import ManifestConfig, DatasetConfig
7
+ import asyncio, os, re, time, duckdb, sqlglot
8
+ import polars as pl, pandas as pd, networkx as nx
9
+
10
+ from . import _constants as c, _utils as u, _py_module as pm, _model_queries as mq, _model_configs as mc, _sources as src, _api_response_models as arm
11
+ from ._exceptions import FileExecutionError, InvalidInputError
12
+ from .arguments.run_time_args import ContextArgs, ModelArgs, BuildModelArgs
13
+ from ._auth import BaseUser
14
+ from ._connection_set import ConnectionsArgs, ConnectionSet, ConnectionProperties
15
+ from ._manifest import DatasetConfig
15
16
  from ._parameter_sets import ParameterConfigsSet, ParametersArgs, ParameterSet
16
17
 
17
18
  ContextFunc = Callable[[dict[str, Any], ContextArgs], None]
18
19
 
19
20
 
20
21
  class ModelType(Enum):
21
- DBVIEW = 1
22
- FEDERATE = 2
23
- SEED = 3
24
-
25
- class _Materialization(Enum):
26
- TABLE = 0
27
- VIEW = 1
28
-
29
-
30
- @dataclass
31
- class _SqlModelConfig:
32
- ## Applicable for dbview models
33
- connection_name: str
34
-
35
- ## Applicable for federated models
36
- materialized: _Materialization
37
-
38
- def set_attribute(self, *, connection_name: str | None = None, materialized: str | None = None, **kwargs) -> str:
39
- if connection_name is not None:
40
- if not isinstance(connection_name, str):
41
- raise u.ConfigurationError("The 'connection_name' argument of 'config' macro must be a string")
42
- self.connection_name = connection_name
43
-
44
- if materialized is not None:
45
- if not isinstance(materialized, str):
46
- raise u.ConfigurationError("The 'materialized' argument of 'config' macro must be a string")
47
- try:
48
- self.materialized = _Materialization[materialized.upper()]
49
- except KeyError as e:
50
- valid_options = [x.name for x in _Materialization]
51
- raise u.ConfigurationError(f"The 'materialized' argument value '{materialized}' is not valid. Must be one of: {valid_options}") from e
52
- return ""
53
-
54
- def get_sql_for_create(self, model_name: str, select_query: str) -> str:
55
- create_prefix = f"CREATE {self.materialized.name} {model_name} AS\n"
56
- return create_prefix + select_query
57
-
58
-
59
- @dataclass(frozen=True)
60
- class QueryFile:
61
- filepath: str
62
- model_type: ModelType
63
-
64
- @dataclass(frozen=True)
65
- class SqlQueryFile(QueryFile):
66
- raw_query: str
67
-
68
- @dataclass(frozen=True)
69
- class _RawPyQuery:
70
- query: Callable[[ModelArgs], pd.DataFrame]
71
- dependencies_func: Callable[[ModelDepsArgs], Iterable[str]]
72
-
73
- @dataclass(frozen=True)
74
- class PyQueryFile(QueryFile):
75
- raw_query: _RawPyQuery
76
-
77
-
78
- @dataclass
79
- class _Query(metaclass=ABCMeta):
80
- query: Any
81
-
82
- @dataclass
83
- class _WorkInProgress(_Query):
84
- query: None = field(default=None, init=False)
85
-
86
- @dataclass
87
- class SqlModelQuery(_Query):
88
- query: str
89
- config: _SqlModelConfig
90
-
91
- @dataclass
92
- class PyModelQuery(_Query):
93
- query: Callable[[], pd.DataFrame]
22
+ SOURCE = "source"
23
+ DBVIEW = "dbview"
24
+ FEDERATE = "federate"
25
+ SEED = "seed"
26
+ BUILD = "build"
94
27
 
95
28
 
96
29
  @dataclass
97
- class Referable(metaclass=ABCMeta):
30
+ class DataModel(metaclass=ABCMeta):
98
31
  name: str
32
+ model_config: mc.ModelConfig
99
33
  is_target: bool = field(default=False, init=False)
100
34
 
101
- needs_sql_table: bool = field(default=False, init=False)
102
- needs_pandas: bool = field(default=False, init=False)
103
- result: pd.DataFrame | None = field(default=None, init=False, repr=False)
35
+ result: pl.LazyFrame | None = field(default=None, init=False, repr=False)
36
+ needs_python_df: bool = field(default=False, init=False)
104
37
 
105
38
  wait_count: int = field(default=0, init=False, repr=False)
106
39
  confirmed_no_cycles: bool = field(default=False, init=False)
107
- upstreams: dict[str, Referable] = field(default_factory=dict, init=False, repr=False)
108
- downstreams: dict[str, Referable] = field(default_factory=dict, init=False, repr=False)
40
+ upstreams: dict[str, DataModel] = field(default_factory=dict, init=False, repr=False)
41
+ downstreams: dict[str, DataModel] = field(default_factory=dict, init=False, repr=False)
42
+
43
+ _: KW_ONLY
44
+ logger: u.Logger = field(default_factory=lambda: u.Logger(""))
45
+ env_vars: dict[str, str] = field(default_factory=dict)
46
+ conn_set: ConnectionSet = field(default_factory=ConnectionSet)
109
47
 
48
+ @property
110
49
  @abstractmethod
111
- def get_model_type(self) -> ModelType:
50
+ def model_type(self) -> ModelType:
112
51
  pass
113
52
 
114
- async def compile(
115
- self, ctx: dict[str, Any], ctx_args: ContextArgs, placeholders: dict[str, Any], models_dict: dict[str, Referable], recurse: bool
53
+ @property
54
+ def is_queryable(self) -> bool:
55
+ return True
56
+
57
+ def compile(
58
+ self, ctx: dict[str, Any], ctx_args: ContextArgs, models_dict: dict[str, DataModel], recurse: bool
116
59
  ) -> None:
117
60
  pass
118
61
 
119
- @abstractmethod
120
62
  def get_terminal_nodes(self, depencency_path: set[str]) -> set[str]:
121
- pass
63
+ if self.confirmed_no_cycles:
64
+ return set()
122
65
 
123
- def _load_pandas_to_table(self, df: pd.DataFrame, conn: Connection) -> None:
124
- df.to_sql(self.name, conn, index=False)
125
-
126
- def _load_table_to_pandas(self, conn: Connection) -> pd.DataFrame:
127
- query = f"SELECT * FROM {self.name}"
128
- return pd.read_sql(query, conn)
66
+ if self.name in depencency_path:
67
+ raise u.ConfigurationError(f'Cycle found in model dependency graph')
68
+
69
+ terminal_nodes = set()
70
+ if len(self.upstreams) == 0:
71
+ terminal_nodes.add(self.name)
72
+ else:
73
+ new_path = set(depencency_path)
74
+ new_path.add(self.name)
75
+ for dep_model in self.upstreams.values():
76
+ terminal_nodes.update(dep_model.get_terminal_nodes(new_path))
77
+
78
+ self.confirmed_no_cycles = True
79
+ return terminal_nodes
80
+
81
+ def _load_duckdb_view_to_python_df(self, conn: duckdb.DuckDBPyConnection, *, use_venv: bool = False) -> pl.LazyFrame:
82
+ table_name = ("venv." if use_venv else "") + self.name
83
+ try:
84
+ return conn.sql(f"FROM {table_name}").pl().lazy()
85
+ except duckdb.CatalogException as e:
86
+ raise u.ConfigurationError(f'Failed to load duckdb table or view "{self.name}" to python dataframe') from e
129
87
 
130
- async def _trigger(self, conn: Connection, placeholders: dict = {}) -> None:
88
+ def _run_sql_query_on_connection(self, connection_name: str, query: str, placeholders: dict = {}) -> pl.DataFrame:
89
+ self.logger.info(f"Running sql query on connection '{connection_name}': {query}")
90
+ return self.conn_set.run_sql_query_from_conn_name(query, connection_name, placeholders)
91
+
92
+ async def _trigger(self, conn: duckdb.DuckDBPyConnection, placeholders: dict = {}) -> None:
131
93
  self.wait_count -= 1
132
94
  if (self.wait_count == 0):
133
95
  await self.run_model(conn, placeholders)
134
96
 
135
- @abstractmethod
136
- async def run_model(self, conn: Connection, placeholders: dict = {}) -> None:
97
+ async def run_model(self, conn: duckdb.DuckDBPyConnection, placeholders: dict = {}) -> None:
137
98
  coroutines = []
138
99
  for model in self.downstreams.values():
139
100
  coroutines.append(model._trigger(conn, placeholders))
140
- await asyncio.gather(*coroutines)
101
+ await u.asyncio_gather(coroutines)
141
102
 
142
103
  def retrieve_dependent_query_models(self, dependent_model_names: set[str]) -> None:
143
104
  pass
144
-
105
+
106
+ def _register_all_upstream_python_df_helper(self, conn: duckdb.DuckDBPyConnection, tables_set: set[str]) -> None:
107
+ if self.result is not None and self.name not in tables_set:
108
+ conn.register(self.name, self.result)
109
+ for dep_model in self.upstreams.values():
110
+ dep_model._register_all_upstream_python_df_helper(conn, tables_set)
111
+
112
+ def register_all_upstream_python_df(self, conn: duckdb.DuckDBPyConnection) -> None:
113
+ show_tables_query = f"SHOW TABLES"
114
+ tables_df = conn.sql(show_tables_query).pl()
115
+ tables_set = set(tables_df["name"])
116
+ self._register_all_upstream_python_df_helper(conn, tables_set)
117
+
145
118
  def get_max_path_length_to_target(self) -> int | None:
146
119
  if not hasattr(self, "max_path_len_to_target"):
147
120
  path_lengths = []
@@ -154,283 +127,748 @@ class Referable(metaclass=ABCMeta):
154
127
  self.max_path_len_to_target = 0 if self.is_target else None
155
128
  return self.max_path_len_to_target
156
129
 
130
+ async def _trigger_build(self, conn: duckdb.DuckDBPyConnection, full_refresh: bool) -> None:
131
+ pass
132
+
133
+ def _create_table_from_df(self, conn: duckdb.DuckDBPyConnection, query_result: pl.LazyFrame | pd.DataFrame):
134
+ local_conn = conn.cursor()
135
+ try:
136
+ local_conn.register("df", query_result)
137
+ local_conn.execute(f"CREATE OR REPLACE TABLE {self.name} AS SELECT * FROM df")
138
+ finally:
139
+ local_conn.close()
140
+
141
+ def process_pass_through_columns(self, models_dict: dict[str, DataModel]) -> None:
142
+ pass
143
+
157
144
 
158
145
  @dataclass
159
- class Seed(Referable):
160
- result: pd.DataFrame
146
+ class StaticModel(DataModel):
147
+ needs_python_df_for_build: bool = field(default=False, init=False)
148
+ wait_count_for_build: int = field(default=0, init=False, repr=False)
149
+ upstreams_for_build: dict[str, StaticModel] = field(default_factory=dict, init=False, repr=False)
150
+ downstreams_for_build: dict[str, StaticModel] = field(default_factory=dict, init=False, repr=False)
151
+
152
+ def get_terminal_nodes_for_build(self, depencency_path: set[str]) -> set[str]:
153
+ if self.confirmed_no_cycles:
154
+ return set()
155
+
156
+ if self.name in depencency_path:
157
+ raise u.ConfigurationError(f'Cycle found in model dependency graph')
158
+
159
+ terminal_nodes = set()
160
+ if len(self.upstreams_for_build) == 0:
161
+ terminal_nodes.add(self.name)
162
+ else:
163
+ new_path = set(depencency_path)
164
+ new_path.add(self.name)
165
+ for dep_model in self.upstreams_for_build.values():
166
+ terminal_nodes.update(dep_model.get_terminal_nodes_for_build(new_path))
167
+
168
+ self.confirmed_no_cycles = True
169
+ return terminal_nodes
170
+
171
+ def _get_result(self, conn: duckdb.DuckDBPyConnection) -> pl.LazyFrame:
172
+ local_conn = conn.cursor()
173
+ try:
174
+ return self._load_duckdb_view_to_python_df(local_conn, use_venv=True)
175
+ except Exception as e:
176
+ raise InvalidInputError(61, f'Model "{self.name}" depends on static data models that cannot be found.')
177
+ finally:
178
+ local_conn.close()
179
+
180
+ async def run_model(self, conn: duckdb.DuckDBPyConnection, placeholders: dict = {}) -> None:
181
+ start = time.time()
161
182
 
162
- def get_model_type(self) -> ModelType:
183
+ if (self.needs_python_df or self.is_target) and self.result is None:
184
+ self.result = await asyncio.to_thread(self._get_result, conn)
185
+
186
+ self.logger.log_activity_time(f"loading static model '{self.name}'", start)
187
+
188
+ await super().run_model(conn, placeholders)
189
+
190
+ def compile_for_build(
191
+ self, conn_args: ConnectionsArgs, models_dict: dict[str, StaticModel]
192
+ ) -> None:
193
+ pass
194
+
195
+ async def _trigger_build(self, conn: duckdb.DuckDBPyConnection, full_refresh: bool) -> None:
196
+ self.wait_count_for_build -= 1
197
+ if (self.wait_count_for_build == 0):
198
+ await self.build_model(conn, full_refresh)
199
+
200
+ async def build_model(self, conn: duckdb.DuckDBPyConnection, full_refresh: bool) -> None:
201
+ if self.needs_python_df and self.result is None:
202
+ local_conn = conn.cursor()
203
+ try:
204
+ self.result = await asyncio.to_thread(self._load_duckdb_view_to_python_df, local_conn)
205
+ finally:
206
+ local_conn.close()
207
+
208
+ coroutines = []
209
+ for model in self.downstreams_for_build.values():
210
+ coroutines.append(model._trigger_build(conn, full_refresh))
211
+ await u.asyncio_gather(coroutines)
212
+
213
+
214
+ @dataclass
215
+ class Seed(StaticModel):
216
+ model_config: mc.SeedConfig
217
+ result: pl.LazyFrame
218
+
219
+ @property
220
+ def model_type(self) -> ModelType:
163
221
  return ModelType.SEED
222
+
223
+ async def build_model(self, conn: duckdb.DuckDBPyConnection, full_refresh: bool) -> None:
224
+ start = time.time()
164
225
 
165
- def get_terminal_nodes(self, depencency_path: set[str]) -> set[str]:
166
- return {self.name}
226
+ print(f"[{u.get_current_time()}] 🔨 BUILDING: seed model '{self.name}'")
227
+ await asyncio.to_thread(self._create_table_from_df, conn, self.result)
228
+
229
+ print(f"[{u.get_current_time()}] ✅ FINISHED: seed model '{self.name}'")
230
+ self.logger.log_activity_time(f"building seed model '{self.name}' to venv", start)
231
+
232
+ await super().build_model(conn, full_refresh)
233
+
234
+
235
+ @dataclass
236
+ class SourceModel(StaticModel):
237
+ model_config: src.Source
238
+
239
+ @property
240
+ def model_type(self) -> ModelType:
241
+ return ModelType.SOURCE
167
242
 
168
- async def run_model(self, conn: Connection, placeholders: dict = {}) -> None:
169
- if self.needs_sql_table:
170
- await asyncio.to_thread(self._load_pandas_to_table, self.result, conn)
171
- await super().run_model(conn, placeholders)
243
+ @property
244
+ def is_queryable(self) -> bool:
245
+ return self.model_config.load_to_duckdb
246
+
247
+ def _build_source_model(self, conn: duckdb.DuckDBPyConnection, full_refresh: bool) -> None:
248
+ local_conn = conn.cursor()
249
+ try:
250
+ source = self.model_config
251
+ conn_name = source.get_connection()
252
+
253
+ connection_props = self.conn_set.get_connection(conn_name)
254
+ if isinstance(connection_props, ConnectionProperties):
255
+ dialect = connection_props.dialect
256
+ else:
257
+ raise u.ConfigurationError(f'Unable to use connection "{conn_name}" for source "{self.name}"')
258
+
259
+ result = u.run_duckdb_stmt(self.logger, local_conn, f"FROM (SHOW DATABASES) WHERE database_name = 'db_{conn_name}'").fetchone()
260
+ if result is None:
261
+ return # skip this source if connection is not attached
262
+
263
+ table_name = source.get_table()
264
+ new_table_name = self.name
172
265
 
266
+ if len(source.columns) == 0:
267
+ stmt = f"CREATE OR REPLACE TABLE {new_table_name} AS SELECT * FROM db_{conn_name}.{table_name}"
268
+ u.run_duckdb_stmt(self.logger, local_conn, stmt)
269
+ return
270
+
271
+ increasing_column = source.update_hints.increasing_column
272
+ recreate_table = full_refresh or increasing_column is None
273
+ if recreate_table:
274
+ u.run_duckdb_stmt(self.logger, local_conn, f"DROP TABLE IF EXISTS {new_table_name}")
275
+
276
+ create_table_cols_clause = source.get_cols_for_create_table_stmt()
277
+ stmt = f"CREATE TABLE IF NOT EXISTS {new_table_name} ({create_table_cols_clause})"
278
+ u.run_duckdb_stmt(self.logger, local_conn, stmt)
279
+
280
+ if not recreate_table:
281
+ if source.update_hints.selective_overwrite_value is not None:
282
+ stmt = f"DELETE FROM {new_table_name} WHERE {increasing_column} >= $value"
283
+ u.run_duckdb_stmt(self.logger, local_conn, stmt, params={"value": source.update_hints.selective_overwrite_value})
284
+ elif not source.update_hints.strictly_increasing:
285
+ stmt = f"DELETE FROM {new_table_name} WHERE {increasing_column} = ({source.get_max_incr_col_query(new_table_name)})"
286
+ u.run_duckdb_stmt(self.logger, local_conn, stmt)
287
+
288
+ max_val_of_incr_col = None
289
+ if increasing_column is not None:
290
+ max_val_of_incr_col_tuple = u.run_duckdb_stmt(self.logger, local_conn, source.get_max_incr_col_query(new_table_name)).fetchone()
291
+ max_val_of_incr_col = max_val_of_incr_col_tuple[0] if isinstance(max_val_of_incr_col_tuple, tuple) else None
292
+ if max_val_of_incr_col is None:
293
+ recreate_table = True
294
+
295
+ insert_cols_clause = source.get_cols_for_insert_stmt()
296
+ insert_replace_clause = source.get_insert_replace_clause()
297
+ query = source.get_query_for_insert(dialect, conn_name, table_name, max_val_of_incr_col, full_refresh=recreate_table)
298
+ stmt = f"INSERT {insert_replace_clause} INTO {new_table_name} ({insert_cols_clause}) {query}"
299
+ u.run_duckdb_stmt(self.logger, local_conn, stmt)
300
+ finally:
301
+ local_conn.close()
302
+
303
+ async def build_model(self, conn: duckdb.DuckDBPyConnection, full_refresh: bool) -> None:
304
+ if self.model_config.load_to_duckdb:
305
+ start = time.time()
306
+ print(f"[{u.get_current_time()}] 🔨 BUILDING: source model '{self.name}'")
307
+
308
+ await asyncio.to_thread(self._build_source_model, conn, full_refresh)
309
+
310
+ print(f"[{u.get_current_time()}] ✅ FINISHED: source model '{self.name}'")
311
+ self.logger.log_activity_time(f"building source model '{self.name}' to venv", start)
312
+
313
+ await super().build_model(conn, full_refresh)
314
+
173
315
 
174
316
  @dataclass
175
- class Model(Referable):
176
- query_file: QueryFile
177
- manifest_cfg: ManifestConfig
178
- conn_set: ConnectionSet
179
- logger: u.Logger = field(default_factory=lambda: u.Logger(""))
317
+ class QueryModel(DataModel):
318
+ model_config: mc.QueryModelConfig
319
+ query_file: mq.QueryFile
320
+ compiled_query: mq.Query | None = field(default=None, init=False)
321
+ _: KW_ONLY
180
322
  j2_env: u.j2.Environment = field(default_factory=lambda: u.j2.Environment(loader=u.j2.FileSystemLoader(".")))
181
- compiled_query: _Query | None = field(default=None, init=False)
182
-
183
- def get_model_type(self) -> ModelType:
184
- return self.query_file.model_type
185
323
 
186
- def _add_upstream(self, other: Referable) -> None:
324
+ def _add_upstream(self, other: DataModel) -> None:
187
325
  self.upstreams[other.name] = other
188
326
  other.downstreams[self.name] = self
189
327
 
190
- if isinstance(self.query_file, SqlQueryFile):
191
- other.needs_sql_table = True
192
- elif isinstance(self.query_file, PyQueryFile):
193
- other.needs_pandas = True
194
-
195
- def _get_dbview_conn_name(self) -> str:
196
- dbview_config = self.manifest_cfg.dbviews.get(self.name)
197
- if dbview_config is None or dbview_config.connection_name is None:
198
- return self.manifest_cfg.settings.get(c.DB_CONN_DEFAULT_USED_SETTING, c.DEFAULT_DB_CONN)
199
- return dbview_config.connection_name
200
-
201
- def _get_materialized(self) -> _Materialization:
202
- federate_config = self.manifest_cfg.federates.get(self.name)
203
- if federate_config is None or federate_config.materialized is None:
204
- materialized = self.manifest_cfg.settings.get(c.DEFAULT_MATERIALIZE_SETTING, c.DEFAULT_MATERIALIZE)
205
- else:
206
- materialized = federate_config.materialized
207
- return _Materialization[materialized.upper()]
208
-
209
- async def _compile_sql_model(
210
- self, ctx: dict[str, Any], ctx_args: ContextArgs, placeholders: dict[str, Any], models_dict: dict[str, Referable]
211
- ) -> tuple[SqlModelQuery, set]:
212
- assert isinstance(self.query_file, SqlQueryFile)
213
-
214
- connection_name = self._get_dbview_conn_name()
215
- materialized = self._get_materialized()
216
- configuration = _SqlModelConfig(connection_name, materialized)
217
- is_placeholder = lambda placeholder: placeholder in placeholders
328
+ if isinstance(self.query_file, mq.PyQueryFile):
329
+ other.needs_python_df = True
330
+
331
+ def _ref_for_sql(self, dependent_model_name: str, models_dict: dict[str, DataModel]) -> str:
332
+ if dependent_model_name not in models_dict:
333
+ raise u.ConfigurationError(f'Model "{self.name}" references unknown model "{dependent_model_name}"')
334
+
335
+ dep_model = models_dict[dependent_model_name]
336
+ if isinstance(dep_model, SourceModel) and not dep_model.model_config.load_to_duckdb:
337
+ raise u.ConfigurationError(
338
+ f'Model "{self.name}" cannot reference source model "{dependent_model_name}" which has load_to_duckdb=False'
339
+ )
340
+
341
+ self.model_config.depends_on.add(dependent_model_name)
342
+ return dependent_model_name
343
+
344
+ def _ref_for_python(self, dependent_model_name: str) -> pl.LazyFrame:
345
+ if dependent_model_name not in self.upstreams:
346
+ raise u.ConfigurationError(f'Model "{self.name}" must include model "{dependent_model_name}" as a dependency to use')
347
+ df = self.upstreams[dependent_model_name].result
348
+ assert df is not None
349
+ return df
350
+
351
+ def _get_compile_sql_model_args_from_ctx_args(
352
+ self, ctx: dict[str, Any], ctx_args: ContextArgs
353
+ ) -> dict[str, Any]:
354
+ is_placeholder = lambda placeholder: placeholder in ctx_args.placeholders
218
355
  kwargs = {
219
356
  "proj_vars": ctx_args.proj_vars, "env_vars": ctx_args.env_vars, "user": ctx_args.user, "prms": ctx_args.prms,
220
357
  "traits": ctx_args.traits, "ctx": ctx, "is_placeholder": is_placeholder, "set_placeholder": ctx_args.set_placeholder,
221
- "config": configuration.set_attribute, "param_exists": ctx_args.param_exists
358
+ "param_exists": ctx_args.param_exists
222
359
  }
223
- dependencies = set()
224
- if self.query_file.model_type == ModelType.FEDERATE:
225
- def ref(dependent_model_name):
226
- if dependent_model_name not in models_dict:
227
- raise u.ConfigurationError(f'Model "{self.name}" references unknown model "{dependent_model_name}"')
228
- dependencies.add(dependent_model_name)
229
- return dependent_model_name
230
- kwargs["ref"] = ref
231
-
360
+ return kwargs
361
+
362
+ def _get_compiled_sql_query_str(self, raw_query: str, kwargs: dict[str, Any]) -> str:
232
363
  try:
233
- template = self.j2_env.from_string(self.query_file.raw_query)
234
- query = await asyncio.to_thread(template.render, kwargs)
364
+ template = self.j2_env.from_string(raw_query)
365
+ query = template.render(kwargs)
235
366
  except Exception as e:
236
- raise u.FileExecutionError(f'Failed to compile sql model "{self.name}"', e) from e
367
+ raise FileExecutionError(f'Failed to compile sql model "{self.name}"', e) from e
368
+ return query
369
+
370
+ def process_pass_through_columns(self, models_dict: dict[str, DataModel]) -> None:
371
+ if getattr(self, "processed_pass_through_columns", False):
372
+ return
373
+
374
+ for col in self.model_config.columns:
375
+ if col.pass_through:
376
+ # Validate pass-through column has exactly one dependency
377
+ if len(col.depends_on) != 1:
378
+ raise u.ConfigurationError(
379
+ f'Column "{self.name}.{col.name}" has pass_through=true, which must have exactly one depends_on value'
380
+ )
381
+
382
+ # Get the upstream column reference
383
+ upstream_col_ref = next(iter(col.depends_on))
384
+ table_name, col_name = upstream_col_ref.split('.')
385
+ self.model_config.depends_on.add(table_name)
386
+
387
+ # Get the upstream model
388
+ if table_name not in models_dict:
389
+ raise u.ConfigurationError(
390
+ f'Column "{self.name}.{col.name}" depends on unknown model "{table_name}"'
391
+ )
392
+
393
+ # Do not rely on self.upstreams here, as it may not be fully populated for metadata passthrough purposes
394
+ for dep_model_name in self.model_config.depends_on:
395
+ dep_model = models_dict[dep_model_name]
396
+ dep_model.process_pass_through_columns(models_dict)
397
+
398
+ for col in self.model_config.columns:
399
+ if col.pass_through:
400
+ upstream_col_ref = next(iter(col.depends_on))
401
+ table_name, col_name = upstream_col_ref.split('.')
402
+ upstream_model = models_dict[table_name]
403
+
404
+ # Find the upstream column config
405
+ upstream_col = next(
406
+ (c for c in upstream_model.model_config.columns if c.name == col_name),
407
+ None
408
+ )
409
+ if upstream_col is None:
410
+ raise u.ConfigurationError(
411
+ f'Column "{self.name}.{col.name}" depends on unknown column "{upstream_col_ref}"'
412
+ )
413
+
414
+ # Copy metadata from upstream column
415
+ col.type = upstream_col.type if col.type == "" else col.type
416
+ col.condition = upstream_col.condition if col.condition == "" else col.condition
417
+ col.description = upstream_col.description if col.description == "" else col.description
418
+ col.category = upstream_col.category if col.category == mc.ColumnCategory.MISC else col.category
419
+
420
+ self.processed_pass_through_columns = True
237
421
 
238
- compiled_query = SqlModelQuery(query, configuration)
239
- return compiled_query, dependencies
422
+ def retrieve_dependent_query_models(self, dependent_model_names: set[str]) -> None:
423
+ if self.name not in dependent_model_names:
424
+ dependent_model_names.add(self.name)
425
+ for dep_model in self.upstreams.values():
426
+ dep_model.retrieve_dependent_query_models(dependent_model_names)
427
+
428
+
429
+ @dataclass
430
+ class DbviewModel(QueryModel):
431
+ model_config: mc.DbviewModelConfig
432
+ query_file: mq.SqlQueryFile
433
+ compiled_query: mq.SqlModelQuery | None = field(default=None, init=False)
434
+ sources: dict[str, src.Source] = field(default_factory=dict, init=False)
435
+
436
+ @property
437
+ def model_type(self) -> ModelType:
438
+ return ModelType.DBVIEW
439
+
440
+ def _get_compile_sql_model_args(
441
+ self, ctx: dict[str, Any], ctx_args: ContextArgs, models_dict: dict[str, DataModel]
442
+ ) -> dict[str, Any]:
443
+ kwargs = self._get_compile_sql_model_args_from_ctx_args(ctx, ctx_args)
444
+
445
+ def source(source_name: str) -> str:
446
+ if source_name not in models_dict or not isinstance(source_model := models_dict[source_name], SourceModel):
447
+ raise u.ConfigurationError(f'Dbview "{self.name}" references unknown source "{source_name}"')
448
+ if source_model.model_config.get_connection() != self.model_config.get_connection():
449
+ raise u.ConfigurationError(f'Dbview "{self.name}" references source "{source_name}" with different connection')
450
+
451
+ # Check if the source model has load_to_duckdb=False but this dbview has translate_to_duckdb=True
452
+ if not source_model.model_config.load_to_duckdb and self.model_config.translate_to_duckdb:
453
+ raise u.ConfigurationError(
454
+ f'Dbview "{self.name}" with translate_to_duckdb=True cannot reference source "{source_name}" '
455
+ f'which has load_to_duckdb=False'
456
+ )
457
+
458
+ self.model_config.depends_on.add(source_name)
459
+ self.sources[source_name] = source_model.model_config
460
+ return "{{ source(\"" + source_name + "\") }}"
461
+
462
+ kwargs["source"] = source
463
+ return kwargs
464
+
465
+ def _get_duckdb_query(self, read_dialect: str, query: str) -> str:
466
+ kwargs = {
467
+ "source": lambda source_name: "venv." + source_name
468
+ }
469
+ compiled_query = self._get_compiled_sql_query_str(query, kwargs)
470
+ return sqlglot.transpile(compiled_query, read=read_dialect, write="duckdb")[0]
240
471
 
241
- async def _compile_python_model(
242
- self, ctx: dict[str, Any], ctx_args: ContextArgs, placeholders: dict[str, Any], models_dict: dict[str, Referable]
243
- ) -> tuple[PyModelQuery, Iterable]:
244
- assert isinstance(self.query_file, PyQueryFile)
472
+ def _compile_sql_model(self, kwargs: dict[str, Any]) -> mq.SqlModelQuery:
473
+ compiled_query_str = self._get_compiled_sql_query_str(self.query_file.raw_query, kwargs)
474
+
475
+ connection_name = self.model_config.get_connection()
476
+ connection_props = self.conn_set.get_connection(connection_name)
245
477
 
246
- sqrl_args = ModelDepsArgs(
247
- ctx_args.proj_vars, ctx_args.env_vars, ctx_args.user, ctx_args.prms, ctx_args.traits, placeholders, ctx
248
- )
249
- try:
250
- dependencies = await asyncio.to_thread(self.query_file.raw_query.dependencies_func, sqrl_args)
251
- for dependent_model_name in dependencies:
252
- if dependent_model_name not in models_dict:
253
- raise u.ConfigurationError(f'Model "{self.name}" references unknown model "{dependent_model_name}"')
254
- except Exception as e:
255
- raise u.FileExecutionError(f'Failed to run "{c.DEP_FUNC}" function for python model "{self.name}"', e) from e
478
+ if self.model_config.translate_to_duckdb and isinstance(connection_props, ConnectionProperties):
479
+ macros = {
480
+ "source": lambda source_name: "venv." + source_name
481
+ }
482
+ compiled_query2 = self._get_compiled_sql_query_str(compiled_query_str, macros)
483
+ compiled_query_str = self._get_duckdb_query(connection_props.dialect, compiled_query2)
484
+ is_duckdb = True
485
+ else:
486
+ macros = {
487
+ "source": lambda source_name: self.sources[source_name].get_table()
488
+ }
489
+ compiled_query_str = self._get_compiled_sql_query_str(compiled_query_str, macros)
490
+ is_duckdb = False
491
+
492
+ compiled_query = mq.SqlModelQuery(compiled_query_str, is_duckdb)
493
+ return compiled_query
494
+
495
+ def compile(
496
+ self, ctx: dict[str, Any], ctx_args: ContextArgs, models_dict: dict[str, DataModel], recurse: bool
497
+ ) -> None:
498
+ if self.compiled_query is not None:
499
+ return
500
+ else:
501
+ self.compiled_query = mq.WorkInProgress() # type: ignore
256
502
 
257
- dbview_conn_name = self._get_dbview_conn_name()
258
- connections = self.conn_set.get_engines_as_dict()
503
+ start = time.time()
259
504
 
260
- def ref(dependent_model_name):
261
- if dependent_model_name not in self.upstreams:
262
- raise u.ConfigurationError(f'Model "{self.name}" must include model "{dependent_model_name}" as a dependency to use')
263
- return pd.DataFrame(self.upstreams[dependent_model_name].result)
505
+ kwargs = self._get_compile_sql_model_args(ctx, ctx_args, models_dict)
506
+ self.compiled_query = self._compile_sql_model(kwargs)
264
507
 
265
- def run_external_sql(sql_query: str, connection_name: str | None):
266
- connection_name = dbview_conn_name if connection_name is None else connection_name
267
- return self.conn_set.run_sql_query_from_conn_name(sql_query, connection_name, placeholders)
508
+ self.logger.log_activity_time(f"compiling dbview model '{self.name}'", start)
509
+
510
+ async def _run_sql_model(self, conn: duckdb.DuckDBPyConnection, placeholders: dict = {}) -> None:
511
+ assert self.compiled_query is not None
512
+ is_duckdb = self.compiled_query.is_duckdb
513
+ query = self.compiled_query.query
514
+ connection_name = self.model_config.get_connection()
268
515
 
269
- use_duckdb = self.manifest_cfg.settings_obj.do_use_duckdb()
270
- sqrl_args = ModelArgs(
271
- ctx_args.proj_vars, ctx_args.env_vars, ctx_args.user, ctx_args.prms, ctx_args.traits, placeholders, ctx,
272
- dbview_conn_name, connections, dependencies, ref, run_external_sql, use_duckdb
273
- )
516
+ def run_sql_query_on_connection(is_duckdb: bool, query: str, placeholders: dict) -> pl.DataFrame:
517
+ try:
518
+ if is_duckdb:
519
+ local_conn = conn.cursor()
520
+ try:
521
+ self.logger.info(f"Running duckdb query: {query}")
522
+ return local_conn.sql(query, params=placeholders).pl()
523
+ except duckdb.CatalogException as e:
524
+ raise InvalidInputError(61, f'Model "{self.name}" depends on static data models that cannot be found.')
525
+ except Exception as e:
526
+ raise RuntimeError(e)
527
+ finally:
528
+ local_conn.close()
529
+ else:
530
+ return self._run_sql_query_on_connection(connection_name, query, placeholders)
531
+ except RuntimeError as e:
532
+ raise FileExecutionError(f'Failed to run dbview sql model "{self.name}"', e)
533
+
534
+ result = await asyncio.to_thread(run_sql_query_on_connection, is_duckdb, query, placeholders)
535
+ self.result = result.lazy()
536
+
537
+ async def run_model(self, conn: duckdb.DuckDBPyConnection, placeholders: dict = {}) -> None:
538
+ start = time.time()
539
+
540
+ await self._run_sql_model(conn, placeholders)
541
+
542
+ self.logger.log_activity_time(f"running dbview model '{self.name}'", start)
543
+
544
+ await super().run_model(conn, placeholders)
545
+
546
+
547
+ @dataclass
548
+ class FederateModel(QueryModel):
549
+ model_config: mc.FederateModelConfig
550
+ query_file: mq.SqlQueryFile | mq.PyQueryFile
551
+ compiled_query: mq.SqlModelQuery | mq.PyModelQuery | None = field(default=None, init=False)
552
+
553
+ @property
554
+ def model_type(self) -> ModelType:
555
+ return ModelType.FEDERATE
556
+
557
+ def _get_compile_sql_model_args(
558
+ self, ctx: dict[str, Any], ctx_args: ContextArgs, models_dict: dict[str, DataModel]
559
+ ) -> dict[str, Any]:
560
+ kwargs = self._get_compile_sql_model_args_from_ctx_args(ctx, ctx_args)
561
+
562
+ def ref(dependent_model_name: str) -> str:
563
+ dependent_model = self._ref_for_sql(dependent_model_name, models_dict)
564
+ prefix = "venv." if isinstance(models_dict[dependent_model], (SourceModel, BuildModel)) else ""
565
+ return prefix + dependent_model
566
+
567
+ kwargs["ref"] = ref
568
+ return kwargs
569
+
570
+ def _compile_sql_model(
571
+ self, query_file: mq.SqlQueryFile, ctx: dict[str, Any], ctx_args: ContextArgs, models_dict: dict[str, DataModel]
572
+ ) -> mq.SqlModelQuery:
573
+ kwargs = self._get_compile_sql_model_args(ctx, ctx_args, models_dict)
574
+ compiled_query_str = self._get_compiled_sql_query_str(query_file.raw_query, kwargs)
575
+ compiled_query = mq.SqlModelQuery(compiled_query_str, is_duckdb=True)
576
+ return compiled_query
577
+
578
+ def _get_python_model_args(self, ctx: dict[str, Any], ctx_args: ContextArgs) -> ModelArgs:
579
+ dependencies = self.model_config.depends_on
580
+ connections = self.conn_set.get_connections_as_dict()
581
+
582
+ def run_external_sql(connection_name: str, sql_query: str) -> pl.DataFrame:
583
+ return self._run_sql_query_on_connection(connection_name, sql_query, ctx_args.placeholders)
584
+
585
+ conn_args = ConnectionsArgs(ctx_args.project_path, ctx_args.proj_vars, ctx_args.env_vars)
586
+ build_model_args = BuildModelArgs(conn_args, connections, dependencies, self._ref_for_python, run_external_sql)
587
+ return ModelArgs(ctx_args, build_model_args, ctx)
588
+
589
+ def _compile_python_model(
590
+ self, query_file: mq.PyQueryFile, ctx: dict[str, Any], ctx_args: ContextArgs
591
+ ) -> mq.PyModelQuery:
592
+ sqrl_args = self._get_python_model_args(ctx, ctx_args)
274
593
 
275
- def compiled_query():
594
+ def compiled_query() -> pl.LazyFrame | pd.DataFrame:
276
595
  try:
277
- assert isinstance(self.query_file, PyQueryFile)
278
- raw_query: _RawPyQuery = self.query_file.raw_query
279
- return raw_query.query(sqrl_args)
596
+ return query_file.raw_query(sqrl_args)
280
597
  except Exception as e:
281
- raise u.FileExecutionError(f'Failed to run "{c.MAIN_FUNC}" function for python model "{self.name}"', e) from e
598
+ raise FileExecutionError(f'Failed to run "{c.MAIN_FUNC}" function for python model "{self.name}"', e) from e
282
599
 
283
- return PyModelQuery(compiled_query), dependencies
284
-
285
- async def compile(
286
- self, ctx: dict[str, Any], ctx_args: ContextArgs, placeholders: dict[str, Any], models_dict: dict[str, Referable], recurse: bool
600
+ return mq.PyModelQuery(compiled_query)
601
+
602
+ def compile(
603
+ self, ctx: dict[str, Any], ctx_args: ContextArgs, models_dict: dict[str, DataModel], recurse: bool
287
604
  ) -> None:
288
605
  if self.compiled_query is not None:
289
606
  return
290
607
  else:
291
- self.compiled_query = _WorkInProgress()
608
+ self.compiled_query = mq.WorkInProgress() # type: ignore
292
609
 
293
610
  start = time.time()
294
611
 
295
- if isinstance(self.query_file, SqlQueryFile):
296
- compiled_query, dependencies = await self._compile_sql_model(ctx, ctx_args, placeholders, models_dict)
297
- elif isinstance(self.query_file, PyQueryFile):
298
- compiled_query, dependencies = await self._compile_python_model(ctx, ctx_args, placeholders, models_dict)
612
+ if isinstance(self.query_file, mq.SqlQueryFile):
613
+ self.compiled_query = self._compile_sql_model(self.query_file, ctx, ctx_args, models_dict)
614
+ elif isinstance(self.query_file, mq.PyQueryFile):
615
+ self.compiled_query = self._compile_python_model(self.query_file, ctx, ctx_args)
299
616
  else:
300
617
  raise NotImplementedError(f"Query type not supported: {self.query_file.__class__.__name__}")
301
618
 
302
- self.compiled_query = compiled_query
303
- self.wait_count = len(set(dependencies))
304
-
305
- model_type = self.get_model_type().name.lower()
306
- self.logger.log_activity_time(f"compiling {model_type} model '{self.name}'", start)
619
+ self.logger.log_activity_time(f"compiling federate model '{self.name}'", start)
307
620
 
308
621
  if not recurse:
309
622
  return
310
623
 
311
- dep_models = [models_dict[x] for x in dependencies]
312
- coroutines = []
313
- for dep_model in dep_models:
624
+ dependencies = self.model_config.depends_on
625
+ self.wait_count = len(dependencies)
626
+
627
+ for name in dependencies:
628
+ dep_model = models_dict[name]
314
629
  self._add_upstream(dep_model)
315
- coro = dep_model.compile(ctx, ctx_args, placeholders, models_dict, recurse)
316
- coroutines.append(coro)
317
- await asyncio.gather(*coroutines)
318
-
319
- def get_terminal_nodes(self, depencency_path: set[str]) -> set[str]:
320
- if self.confirmed_no_cycles:
321
- return set()
322
-
323
- if self.name in depencency_path:
324
- raise u.ConfigurationError(f'Cycle found in model dependency graph')
630
+ dep_model.compile(ctx, ctx_args, models_dict, recurse)
325
631
 
326
- terminal_nodes = set()
327
- if len(self.upstreams) == 0:
328
- terminal_nodes.add(self.name)
329
- else:
330
- new_path = set(depencency_path)
331
- new_path.add(self.name)
332
- for dep_model in self.upstreams.values():
333
- terminal_nodes_under_dep = dep_model.get_terminal_nodes(new_path)
334
- terminal_nodes = terminal_nodes.union(terminal_nodes_under_dep)
335
-
336
- self.confirmed_no_cycles = True
337
- return terminal_nodes
632
+ async def _run_sql_model(self, compiled_query: mq.SqlModelQuery, conn: duckdb.DuckDBPyConnection, placeholders: dict = {}) -> None:
633
+ local_conn = conn.cursor()
634
+ try:
635
+ self.register_all_upstream_python_df(local_conn)
636
+ query = compiled_query.query
338
637
 
339
- async def _run_sql_model(self, conn: Connection, placeholders: dict = {}) -> None:
340
- assert(isinstance(self.compiled_query, SqlModelQuery))
341
- config = self.compiled_query.config
342
- query = self.compiled_query.query
638
+ def create_table(local_conn: duckdb.DuckDBPyConnection):
639
+ placeholer_exists = lambda key: re.search(r"\$" + key + r"(?!\w)", query)
640
+ existing_placeholders = {key: value for key, value in placeholders.items() if placeholer_exists(key)}
343
641
 
344
- if self.query_file.model_type == ModelType.DBVIEW:
345
- def run_sql_query():
346
- try:
347
- return self.conn_set.run_sql_query_from_conn_name(query, config.connection_name, placeholders)
348
- except RuntimeError as e:
349
- raise u.FileExecutionError(f'Failed to run dbview sql model "{self.name}"', e) from e
350
-
351
- df = await asyncio.to_thread(run_sql_query)
352
- await asyncio.to_thread(self._load_pandas_to_table, df, conn)
353
- if self.needs_pandas or self.is_target:
354
- self.result = df
355
- elif self.query_file.model_type == ModelType.FEDERATE:
356
- def create_table():
357
- create_query = config.get_sql_for_create(self.name, query)
642
+ create_query = self.model_config.get_sql_for_create(self.name, query)
358
643
  try:
359
- return conn.execute(text(create_query), placeholders)
644
+ return local_conn.execute(create_query, existing_placeholders)
645
+ except duckdb.CatalogException as e:
646
+ raise InvalidInputError(61, f'Model "{self.name}" depends on static data models that cannot be found.')
360
647
  except Exception as e:
361
- raise u.FileExecutionError(f'Failed to run federate sql model "{self.name}"', e) from e
648
+ if self.name == "__fake_target":
649
+ raise InvalidInputError(204, f"Failed to run provided SQL query")
650
+ else:
651
+ raise FileExecutionError(f'Failed to run federate sql model "{self.name}"', e) from e
362
652
 
363
- await asyncio.to_thread(create_table)
364
- if self.needs_pandas or self.is_target:
365
- self.result = await asyncio.to_thread(self._load_table_to_pandas, conn)
366
-
367
- async def _run_python_model(self, conn: Connection) -> None:
368
- assert(isinstance(self.compiled_query, PyModelQuery))
653
+ await asyncio.to_thread(create_table, local_conn)
654
+ if self.needs_python_df or self.is_target:
655
+ self.result = await asyncio.to_thread(self._load_duckdb_view_to_python_df, local_conn)
656
+ finally:
657
+ local_conn.close()
658
+
659
+ async def _run_python_model(self, compiled_query: mq.PyModelQuery) -> None:
660
+ query_result = await asyncio.to_thread(compiled_query.query)
661
+ if isinstance(query_result, pd.DataFrame):
662
+ query_result = pl.from_pandas(query_result)
663
+
664
+ self.result = query_result.lazy()
369
665
 
370
- df = await asyncio.to_thread(self.compiled_query.query)
371
- if self.needs_sql_table:
372
- await asyncio.to_thread(self._load_pandas_to_table, df, conn)
373
- if self.needs_pandas or self.is_target:
374
- self.result = df
375
-
376
- async def run_model(self, conn: Connection, placeholders: dict = {}) -> None:
666
+ async def run_model(self, conn: duckdb.DuckDBPyConnection, placeholders: dict = {}) -> None:
377
667
  start = time.time()
378
668
 
379
- if isinstance(self.query_file, SqlQueryFile):
380
- await self._run_sql_model(conn, placeholders)
381
- elif isinstance(self.query_file, PyQueryFile):
382
- await self._run_python_model(conn)
669
+ if isinstance(self.compiled_query, mq.SqlModelQuery):
670
+ await self._run_sql_model(self.compiled_query, conn, placeholders)
671
+ elif isinstance(self.compiled_query, mq.PyModelQuery):
672
+ await self._run_python_model(self.compiled_query)
383
673
  else:
384
674
  raise NotImplementedError(f"Query type not supported: {self.query_file.__class__.__name__}")
385
675
 
386
- model_type = self.get_model_type().name.lower()
387
- self.logger.log_activity_time(f"running {model_type} model '{self.name}'", start)
676
+ self.logger.log_activity_time(f"running federate model '{self.name}'", start)
388
677
 
389
678
  await super().run_model(conn, placeholders)
390
679
 
391
- def retrieve_dependent_query_models(self, dependent_model_names: set[str]) -> None:
392
- if self.name not in dependent_model_names:
393
- dependent_model_names.add(self.name)
394
- for dep_model in self.upstreams.values():
395
- dep_model.retrieve_dependent_query_models(dependent_model_names)
680
+
681
+ @dataclass
682
+ class BuildModel(StaticModel, QueryModel):
683
+ model_config: mc.BuildModelConfig
684
+ query_file: mq.SqlQueryFile | mq.PyQueryFile
685
+ compiled_query: mq.SqlModelQuery | mq.PyModelQuery | None = field(default=None, init=False)
686
+
687
+ @property
688
+ def model_type(self) -> ModelType:
689
+ return ModelType.BUILD
690
+
691
+ def _add_upstream_for_build(self, other: StaticModel) -> None:
692
+ self.upstreams_for_build[other.name] = other
693
+ other.downstreams_for_build[self.name] = self
694
+
695
+ if isinstance(self.query_file, mq.PyQueryFile):
696
+ other.needs_python_df_for_build = True
697
+
698
+ def _get_compile_sql_model_args(
699
+ self, conn_args: ConnectionsArgs, models_dict: dict[str, StaticModel]
700
+ ) -> dict[str, Any]:
701
+ kwargs: dict[str, Any] = {
702
+ "proj_vars": conn_args.proj_vars, "env_vars": conn_args.env_vars
703
+ }
704
+
705
+ def ref_for_build(dependent_model_name: str) -> str:
706
+ dependent_model = self._ref_for_sql(dependent_model_name, dict(models_dict))
707
+ return dependent_model
708
+
709
+ kwargs["ref"] = ref_for_build
710
+ return kwargs
711
+
712
+ def _compile_sql_model(
713
+ self, query_file: mq.SqlQueryFile, conn_args: ConnectionsArgs, models_dict: dict[str, StaticModel]
714
+ ) -> mq.SqlModelQuery:
715
+ kwargs = self._get_compile_sql_model_args(conn_args, models_dict)
716
+ compiled_query_str = self._get_compiled_sql_query_str(query_file.raw_query, kwargs)
717
+ compiled_query = mq.SqlModelQuery(compiled_query_str, is_duckdb=True)
718
+ return compiled_query
719
+
720
+ def _ref_for_python(self, dependent_model_name: str) -> pl.LazyFrame:
721
+ if dependent_model_name not in self.upstreams_for_build:
722
+ raise u.ConfigurationError(f'Model "{self.name}" must include model "{dependent_model_name}" as a dependency to use')
723
+ df = self.upstreams_for_build[dependent_model_name].result
724
+ assert df is not None
725
+ return df
726
+
727
+ def _get_compile_python_model_args(self, conn_args: ConnectionsArgs) -> BuildModelArgs:
728
+
729
+ def run_external_sql(connection_name: str, sql_query: str):
730
+ return self._run_sql_query_on_connection(connection_name, sql_query)
731
+
732
+ return BuildModelArgs(
733
+ conn_args, self.conn_set.get_connections_as_dict(), self.model_config.depends_on, self._ref_for_python, run_external_sql
734
+ )
735
+
736
+ def _compile_python_model(
737
+ self, query_file: mq.PyQueryFile, conn_args: ConnectionsArgs
738
+ ) -> mq.PyModelQuery:
739
+ sqrl_args = self._get_compile_python_model_args(conn_args)
740
+
741
+ def compiled_query() -> pl.LazyFrame | pd.DataFrame:
742
+ try:
743
+ return query_file.raw_query(sqrl_args)
744
+ except Exception as e:
745
+ raise FileExecutionError(f'Failed to run "{c.MAIN_FUNC}" function for build model "{self.name}"', e)
746
+
747
+ return mq.PyModelQuery(compiled_query)
748
+
749
+ def compile_for_build(self, conn_args: ConnectionsArgs, models_dict: dict[str, StaticModel]) -> None:
750
+ start = time.time()
751
+
752
+ if isinstance(self.query_file, mq.SqlQueryFile):
753
+ self.compiled_query = self._compile_sql_model(self.query_file, conn_args, models_dict)
754
+ elif isinstance(self.query_file, mq.PyQueryFile):
755
+ self.compiled_query = self._compile_python_model(self.query_file, conn_args)
756
+ else:
757
+ raise NotImplementedError(f"Query type not supported: {self.query_file.__class__.__name__}")
758
+
759
+ self.logger.log_activity_time(f"compiling build model '{self.name}'", start)
760
+
761
+ dependencies = self.model_config.depends_on
762
+ self.wait_count_for_build = len(dependencies)
763
+
764
+ for name in dependencies:
765
+ dep_model = models_dict[name]
766
+ self._add_upstream_for_build(dep_model)
767
+
768
+ async def _build_sql_model(self, compiled_query: mq.SqlModelQuery, conn: duckdb.DuckDBPyConnection) -> None:
769
+ query = compiled_query.query
770
+
771
+ def create_table():
772
+ create_query = self.model_config.get_sql_for_build(self.name, query)
773
+ local_conn = conn.cursor()
774
+ try:
775
+ return u.run_duckdb_stmt(self.logger, local_conn, create_query)
776
+ except Exception as e:
777
+ raise FileExecutionError(f'Failed to build static sql model "{self.name}"', e) from e
778
+ finally:
779
+ local_conn.close()
780
+
781
+ await asyncio.to_thread(create_table)
782
+
783
+ async def _build_python_model(self, compiled_query: mq.PyModelQuery, conn: duckdb.DuckDBPyConnection) -> None:
784
+ query_result = await asyncio.to_thread(compiled_query.query)
785
+ if isinstance(query_result, pd.DataFrame):
786
+ query_result = pl.from_pandas(query_result).lazy()
787
+ if self.needs_python_df_for_build:
788
+ self.result = query_result.lazy()
789
+ await asyncio.to_thread(self._create_table_from_df, conn, query_result)
790
+
791
+ async def build_model(self, conn: duckdb.DuckDBPyConnection, full_refresh: bool) -> None:
792
+ start = time.time()
793
+ print(f"[{u.get_current_time()}] 🔨 BUILDING: build model '{self.name}'")
794
+
795
+ if isinstance(self.compiled_query, mq.SqlModelQuery):
796
+ await self._build_sql_model(self.compiled_query, conn)
797
+ elif isinstance(self.compiled_query, mq.PyModelQuery):
798
+ # First ensure all upstream models have an associated Python dataframe
799
+ def load_df(conn: duckdb.DuckDBPyConnection, dep_model: DataModel):
800
+ if dep_model.result is None:
801
+ local_conn = conn.cursor()
802
+ try:
803
+ dep_model.result = dep_model._load_duckdb_view_to_python_df(local_conn)
804
+ finally:
805
+ local_conn.close()
806
+
807
+ coroutines = []
808
+ for dep_model in self.upstreams_for_build.values():
809
+ coro = asyncio.to_thread(load_df, conn, dep_model)
810
+ coroutines.append(coro)
811
+ await u.asyncio_gather(coroutines)
812
+
813
+ # Then run the model's Python function to build the model
814
+ await self._build_python_model(self.compiled_query, conn)
815
+ else:
816
+ raise NotImplementedError(f"Query type not supported: {self.query_file.__class__.__name__}")
817
+
818
+ print(f"[{u.get_current_time()}] ✅ FINISHED: build model '{self.name}'")
819
+ self.logger.log_activity_time(f"building static build model '{self.name}'", start)
820
+
821
+ await super().build_model(conn, full_refresh)
396
822
 
397
823
 
398
824
  @dataclass
399
825
  class DAG:
400
- manifest_cfg: ManifestConfig
401
- dataset: DatasetConfig
402
- target_model: Referable
403
- models_dict: dict[str, Referable]
826
+ dataset: DatasetConfig | None
827
+ target_model: DataModel
828
+ models_dict: dict[str, DataModel]
829
+ duckdb_filepath: str = field(default="")
404
830
  logger: u.Logger = field(default_factory=lambda: u.Logger(""))
405
831
  parameter_set: ParameterSet | None = field(default=None, init=False) # set in apply_selections
406
832
  placeholders: dict[str, Any] = field(init=False, default_factory=dict)
407
833
 
834
+ def _get_msg_extension(self) -> str:
835
+ return f" for dataset '{self.dataset.name}'" if self.dataset else ""
836
+
837
+ def compile_build_models(self, conn_args: ConnectionsArgs) -> None:
838
+ static_models: dict[str, StaticModel] = {k: v for k, v in self.models_dict.items() if isinstance(v, StaticModel)}
839
+ for model in static_models.values():
840
+ if isinstance(model, BuildModel):
841
+ model.compile_for_build(conn_args, static_models)
842
+
408
843
  def apply_selections(
409
- self, param_cfg_set: ParameterConfigsSet, user: User | None, selections: dict[str, str], *, updates_only: bool = False, request_version: int | None = None
844
+ self, param_cfg_set: ParameterConfigsSet, user: BaseUser | None, selections: dict[str, str]
410
845
  ) -> None:
411
846
  start = time.time()
412
- dataset_params = self.dataset.parameters
413
- parameter_set = param_cfg_set.apply_selections(
414
- dataset_params, selections, user, updates_only=updates_only, request_version=request_version
415
- )
847
+ dataset_params = self.dataset.parameters if self.dataset else None
848
+ parameter_set = param_cfg_set.apply_selections(dataset_params, selections, user)
416
849
  self.parameter_set = parameter_set
417
- self.logger.log_activity_time(f"applying selections for dataset '{self.dataset.name}'", start)
850
+ msg_extension = self._get_msg_extension()
851
+ self.logger.log_activity_time("applying selections" + msg_extension, start)
418
852
 
419
- def _compile_context(self, param_args: ParametersArgs, context_func: ContextFunc, user: User | None) -> tuple[dict[str, Any], ContextArgs]:
853
+ def _compile_context(
854
+ self, param_args: ParametersArgs, context_func: ContextFunc, user: BaseUser | None, default_traits: dict[str, Any]
855
+ ) -> tuple[dict[str, Any], ContextArgs]:
420
856
  start = time.time()
421
857
  context = {}
422
858
  assert isinstance(self.parameter_set, ParameterSet)
423
859
  prms = self.parameter_set.get_parameters_as_dict()
424
- args = ContextArgs(param_args.proj_vars, param_args.env_vars, user, prms, self.dataset.traits, self.placeholders)
860
+ traits = self.dataset.traits if self.dataset else default_traits
861
+ args = ContextArgs(param_args, user, prms, traits)
862
+ msg_extension = self._get_msg_extension()
425
863
  try:
426
864
  context_func(context, args)
427
865
  except Exception as e:
428
- raise u.FileExecutionError(f'Failed to run {c.CONTEXT_FILE} for dataset "{self.dataset.name}"', e) from e
429
- self.logger.log_activity_time(f"running context.py for dataset '{self.dataset.name}'", start)
866
+ raise FileExecutionError(f'Failed to run {c.CONTEXT_FILE}' + msg_extension, e) from e
867
+ self.logger.log_activity_time("running context.py" + msg_extension, start)
430
868
  return context, args
431
869
 
432
- async def _compile_models(self, context: dict[str, Any], ctx_args: ContextArgs, recurse: bool) -> None:
433
- await self.target_model.compile(context, ctx_args, self.placeholders, self.models_dict, recurse)
870
+ def _compile_models(self, context: dict[str, Any], ctx_args: ContextArgs, recurse: bool) -> None:
871
+ self.target_model.compile(context, ctx_args, self.models_dict, recurse)
434
872
 
435
873
  def _get_terminal_nodes(self) -> set[str]:
436
874
  start = time.time()
@@ -440,39 +878,53 @@ class DAG:
440
878
  self.logger.log_activity_time(f"validating no cycles in model dependencies", start)
441
879
  return terminal_nodes
442
880
 
443
- async def _run_models(self, terminal_nodes: set[str], placeholders: dict = {}) -> None:
444
- use_duckdb = self.manifest_cfg.settings_obj.do_use_duckdb()
445
- conn_url = "duckdb:///" if use_duckdb else "sqlite:///?check_same_thread=False"
446
- engine = create_engine(conn_url)
881
+ async def _run_models(self) -> None:
882
+ terminal_nodes = self._get_terminal_nodes()
883
+
884
+ # create an empty duckdb venv file if it does not exist
885
+ try:
886
+ conn = duckdb.connect(self.duckdb_filepath)
887
+ conn.close()
888
+ except duckdb.IOException as e:
889
+ # unable to create duckdb venv file means it's in use and already exists
890
+ # do not throw error here since attaching in read-only mode later may still work
891
+ pass
447
892
 
448
- with engine.connect() as conn:
893
+ conn = u.create_duckdb_connection()
894
+ try:
895
+ read_only = "(READ_ONLY)" if self.duckdb_filepath else ""
896
+ try:
897
+ conn.execute(f"ATTACH '{self.duckdb_filepath}' AS venv {read_only}")
898
+ except duckdb.IOException as e:
899
+ self.logger.warning(f"Unable to attach to duckdb venv file: {self.duckdb_filepath}")
900
+ raise e
901
+
449
902
  coroutines = []
450
903
  for model_name in terminal_nodes:
451
- model = self.models_dict[model_name]
452
- coroutines.append(model.run_model(conn, placeholders))
453
- await asyncio.gather(*coroutines)
454
-
455
- engine.dispose()
904
+ model = self.models_dict[model_name] if model_name != "__fake_target" else self.target_model
905
+ coroutines.append(model.run_model(conn, self.placeholders))
906
+ await u.asyncio_gather(coroutines)
907
+
908
+ finally:
909
+ conn.close()
456
910
 
457
911
  async def execute(
458
- self, param_args: ParametersArgs, param_cfg_set: ParameterConfigsSet, context_func: ContextFunc, user: User | None, selections: dict[str, str],
459
- *, request_version: int | None = None, runquery: bool = True, recurse: bool = True
460
- ) -> dict[str, Any]:
912
+ self, param_args: ParametersArgs, param_cfg_set: ParameterConfigsSet, context_func: ContextFunc, user: BaseUser | None, selections: dict[str, str],
913
+ *, runquery: bool = True, recurse: bool = True, default_traits: dict[str, Any] = {}
914
+ ) -> None:
461
915
  recurse = (recurse or runquery)
462
916
 
463
- self.apply_selections(param_cfg_set, user, selections, request_version=request_version)
917
+ self.apply_selections(param_cfg_set, user, selections)
464
918
 
465
- context, ctx_args = self._compile_context(param_args, context_func, user)
919
+ context, ctx_args = self._compile_context(param_args, context_func, user, default_traits)
466
920
 
467
- await self._compile_models(context, ctx_args, recurse)
921
+ self._compile_models(context, ctx_args, recurse)
468
922
 
469
- terminal_nodes = self._get_terminal_nodes()
470
-
471
- placeholders = ctx_args._placeholders.copy()
923
+ self.placeholders = ctx_args.placeholders
472
924
  if runquery:
473
- await self._run_models(terminal_nodes, placeholders)
474
-
475
- return placeholders
925
+ await self._run_models()
926
+
927
+ self.target_model.process_pass_through_columns(self.models_dict)
476
928
 
477
929
  def get_all_query_models(self) -> set[str]:
478
930
  all_model_names = set()
@@ -483,10 +935,9 @@ class DAG:
483
935
  G = nx.DiGraph()
484
936
 
485
937
  for model_name, model in self.models_dict.items():
486
- model_type = model.get_model_type()
487
938
  level = model.get_max_path_length_to_target()
488
939
  if level is not None:
489
- G.add_node(model_name, layer=-level, model_type=model_type)
940
+ G.add_node(model_name, layer=-level, model_type=model.model_type)
490
941
 
491
942
  for model_name in G.nodes:
492
943
  model = self.models_dict[model_name]
@@ -494,46 +945,101 @@ class DAG:
494
945
  G.add_edge(model_name, dep_model_name)
495
946
 
496
947
  return G
948
+
949
+ def get_all_data_models(self) -> list[arm.DataModelItem]:
950
+ data_models = []
951
+ for model_name, model in self.models_dict.items():
952
+ is_queryable = model.is_queryable
953
+ data_model = arm.DataModelItem(name=model_name, model_type=model.model_type.value, config=model.model_config, is_queryable=is_queryable)
954
+ data_models.append(data_model)
955
+ return data_models
956
+
957
+ def get_all_model_lineage(self) -> list[arm.LineageRelation]:
958
+ model_lineage = []
959
+ for model_name, model in self.models_dict.items():
960
+ if not isinstance(model, QueryModel):
961
+ continue
962
+ for dep_model_name in model.model_config.depends_on:
963
+ edge_type = "buildtime" if isinstance(model, BuildModel) else "runtime"
964
+ source_model = arm.LineageNode(name=dep_model_name, type="model")
965
+ target_model = arm.LineageNode(name=model_name, type="model")
966
+ model_lineage.append(arm.LineageRelation(type=edge_type, source=source_model, target=target_model))
967
+ return model_lineage
497
968
 
498
969
 
499
970
  class ModelsIO:
500
971
 
501
972
  @classmethod
502
- def load_files(cls, logger: u.Logger, base_path: str) -> dict[str, QueryFile]:
503
- start = time.time()
504
- raw_queries_by_model: dict[str, QueryFile] = {}
505
-
506
- def populate_from_file(dp: str, file: str, model_type: ModelType) -> None:
507
- filepath = Path(dp, file)
508
- file_stem, extension = os.path.splitext(file)
509
- if extension == '.py':
510
- module = pm.PyModule(filepath)
511
- dependencies_func = module.get_func_or_class(c.DEP_FUNC, default_attr=lambda sqrl: [])
512
- raw_query = _RawPyQuery(module.get_func_or_class(c.MAIN_FUNC), dependencies_func)
513
- query_file = PyQueryFile(filepath.as_posix(), model_type, raw_query)
514
- elif extension == '.sql':
515
- query_file = SqlQueryFile(filepath.as_posix(), model_type, filepath.read_text())
516
- else:
517
- query_file = None
518
-
519
- if query_file is not None:
520
- if file_stem in raw_queries_by_model:
521
- conflicts = [raw_queries_by_model[file_stem].filepath, filepath]
522
- raise u.ConfigurationError(f"Multiple models found for '{file_stem}': {conflicts}")
523
- raw_queries_by_model[file_stem] = query_file
524
-
525
- def populate_raw_queries_for_type(folder_path: Path, model_type: ModelType) -> None:
526
- for dp, _, filenames in os.walk(folder_path):
527
- for file in filenames:
528
- populate_from_file(dp, file, model_type)
973
+ def _load_model_config(cls, filepath: Path, model_type: ModelType, env_vars: dict[str, str]) -> mc.ModelConfig:
974
+ yaml_path = filepath.with_suffix('.yml')
975
+ config_dict = u.load_yaml_config(yaml_path) if yaml_path.exists() else {}
976
+
977
+ if model_type == ModelType.DBVIEW:
978
+ config = mc.DbviewModelConfig(**config_dict).finalize_connection(env_vars)
979
+ return config
980
+ elif model_type == ModelType.FEDERATE:
981
+ return mc.FederateModelConfig(**config_dict)
982
+ elif model_type == ModelType.BUILD:
983
+ return mc.BuildModelConfig(**config_dict)
984
+ else:
985
+ return mc.ModelConfig(**config_dict)
986
+
987
+ @classmethod
988
+ def _populate_from_file(
989
+ cls, raw_queries_by_model: dict[str, mq.QueryFileWithConfig], dp: str, file: str, model_type: ModelType, env_vars: dict[str, str]
990
+ ) -> None:
991
+ filepath = Path(dp, file)
992
+ file_stem, extension = os.path.splitext(file)
993
+
994
+ if extension == '.py':
995
+ module = pm.PyModule(filepath)
996
+ raw_query = module.get_func_or_class(c.MAIN_FUNC)
997
+ query_file = mq.PyQueryFile(filepath.as_posix(), raw_query)
998
+ elif extension == '.sql':
999
+ query_file = mq.SqlQueryFile(filepath.as_posix(), filepath.read_text())
1000
+ else:
1001
+ return # Skip files that are not query files
529
1002
 
1003
+ if file_stem in raw_queries_by_model:
1004
+ assert isinstance(prior_query_file := raw_queries_by_model[file_stem].query_file, mq.QueryFile)
1005
+ conflicts = [prior_query_file.filepath, query_file.filepath]
1006
+ raise u.ConfigurationError(f"Multiple models found for '{file_stem}': {conflicts}")
1007
+
1008
+ model_config = cls._load_model_config(filepath, model_type, env_vars)
1009
+ raw_queries_by_model[file_stem] = mq.QueryFileWithConfig(query_file, model_config)
1010
+
1011
+ @classmethod
1012
+ def _populate_raw_queries_for_type(
1013
+ cls, folder_path: Path, model_type: ModelType, *, env_vars: dict[str, str] = {}
1014
+ ) -> dict[str, mq.QueryFileWithConfig]:
1015
+ raw_queries_by_model: dict[str, mq.QueryFileWithConfig] = {}
1016
+ for dp, _, filenames in os.walk(folder_path):
1017
+ for file in filenames:
1018
+ cls._populate_from_file(raw_queries_by_model, dp, file, model_type, env_vars)
1019
+ return raw_queries_by_model
1020
+
1021
+ @classmethod
1022
+ def load_build_files(cls, logger: u.Logger, base_path: str) -> dict[str, mq.QueryFileWithConfig]:
1023
+ start = time.time()
1024
+ builds_path = u.Path(base_path, c.MODELS_FOLDER, c.BUILDS_FOLDER)
1025
+ raw_queries_by_model = cls._populate_raw_queries_for_type(builds_path, ModelType.BUILD)
1026
+ logger.log_activity_time("loading build files", start)
1027
+ return raw_queries_by_model
1028
+
1029
+ @classmethod
1030
+ def load_dbview_files(cls, logger: u.Logger, base_path: str, env_vars: dict[str, str]) -> dict[str, mq.QueryFileWithConfig]:
1031
+ start = time.time()
530
1032
  dbviews_path = u.Path(base_path, c.MODELS_FOLDER, c.DBVIEWS_FOLDER)
531
- populate_raw_queries_for_type(dbviews_path, ModelType.DBVIEW)
1033
+ raw_queries_by_model = cls._populate_raw_queries_for_type(dbviews_path, ModelType.DBVIEW, env_vars=env_vars)
1034
+ logger.log_activity_time("loading dbview files", start)
1035
+ return raw_queries_by_model
532
1036
 
1037
+ @classmethod
1038
+ def load_federate_files(cls, logger: u.Logger, base_path: str) -> dict[str, mq.QueryFileWithConfig]:
1039
+ start = time.time()
533
1040
  federates_path = u.Path(base_path, c.MODELS_FOLDER, c.FEDERATES_FOLDER)
534
- populate_raw_queries_for_type(federates_path, ModelType.FEDERATE)
535
-
536
- logger.log_activity_time("loading files for models", start)
1041
+ raw_queries_by_model = cls._populate_raw_queries_for_type(federates_path, ModelType.FEDERATE)
1042
+ logger.log_activity_time("loading federate files", start)
537
1043
  return raw_queries_by_model
538
1044
 
539
1045
  @classmethod