squirrels 0.5.0rc0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of squirrels might be problematic. Click here for more details.

Files changed (108) hide show
  1. dateutils/__init__.py +6 -0
  2. dateutils/_enums.py +25 -0
  3. squirrels/dateutils.py → dateutils/_implementation.py +58 -111
  4. dateutils/types.py +6 -0
  5. squirrels/__init__.py +10 -12
  6. squirrels/_api_routes/__init__.py +5 -0
  7. squirrels/_api_routes/auth.py +271 -0
  8. squirrels/_api_routes/base.py +171 -0
  9. squirrels/_api_routes/dashboards.py +158 -0
  10. squirrels/_api_routes/data_management.py +148 -0
  11. squirrels/_api_routes/datasets.py +265 -0
  12. squirrels/_api_routes/oauth2.py +298 -0
  13. squirrels/_api_routes/project.py +252 -0
  14. squirrels/_api_server.py +245 -781
  15. squirrels/_arguments/__init__.py +0 -0
  16. squirrels/{arguments → _arguments}/init_time_args.py +7 -2
  17. squirrels/{arguments → _arguments}/run_time_args.py +13 -35
  18. squirrels/_auth.py +720 -212
  19. squirrels/_command_line.py +81 -41
  20. squirrels/_compile_prompts.py +147 -0
  21. squirrels/_connection_set.py +16 -7
  22. squirrels/_constants.py +29 -9
  23. squirrels/{_dashboards_io.py → _dashboards.py} +87 -6
  24. squirrels/_data_sources.py +570 -0
  25. squirrels/{dataset_result.py → _dataset_types.py} +2 -4
  26. squirrels/_exceptions.py +9 -37
  27. squirrels/_initializer.py +83 -59
  28. squirrels/_logging.py +117 -0
  29. squirrels/_manifest.py +129 -62
  30. squirrels/_model_builder.py +10 -52
  31. squirrels/_model_configs.py +3 -3
  32. squirrels/_model_queries.py +1 -1
  33. squirrels/_models.py +249 -118
  34. squirrels/{package_data → _package_data}/base_project/.env +16 -4
  35. squirrels/{package_data → _package_data}/base_project/.env.example +15 -3
  36. squirrels/{package_data → _package_data}/base_project/connections.yml +4 -3
  37. squirrels/{package_data → _package_data}/base_project/dashboards/dashboard_example.py +4 -4
  38. squirrels/_package_data/base_project/dashboards/dashboard_example.yml +22 -0
  39. squirrels/{package_data → _package_data}/base_project/duckdb_init.sql +1 -0
  40. squirrels/_package_data/base_project/macros/macros_example.sql +17 -0
  41. squirrels/{package_data → _package_data}/base_project/models/builds/build_example.py +2 -2
  42. squirrels/{package_data → _package_data}/base_project/models/builds/build_example.sql +1 -1
  43. squirrels/{package_data → _package_data}/base_project/models/builds/build_example.yml +2 -0
  44. squirrels/_package_data/base_project/models/dbviews/dbview_example.sql +17 -0
  45. squirrels/_package_data/base_project/models/dbviews/dbview_example.yml +32 -0
  46. squirrels/_package_data/base_project/models/federates/federate_example.py +48 -0
  47. squirrels/_package_data/base_project/models/federates/federate_example.sql +21 -0
  48. squirrels/{package_data → _package_data}/base_project/models/federates/federate_example.yml +7 -7
  49. squirrels/{package_data → _package_data}/base_project/models/sources.yml +5 -6
  50. squirrels/{package_data → _package_data}/base_project/parameters.yml +32 -45
  51. squirrels/_package_data/base_project/pyconfigs/connections.py +18 -0
  52. squirrels/{package_data → _package_data}/base_project/pyconfigs/context.py +31 -22
  53. squirrels/_package_data/base_project/pyconfigs/parameters.py +141 -0
  54. squirrels/_package_data/base_project/pyconfigs/user.py +44 -0
  55. squirrels/{package_data → _package_data}/base_project/seeds/seed_categories.yml +1 -1
  56. squirrels/{package_data → _package_data}/base_project/seeds/seed_subcategories.yml +1 -1
  57. squirrels/_package_data/base_project/squirrels.yml.j2 +61 -0
  58. squirrels/_package_data/templates/dataset_results.html +112 -0
  59. squirrels/_package_data/templates/oauth_login.html +271 -0
  60. squirrels/_package_data/templates/squirrels_studio.html +20 -0
  61. squirrels/_parameter_configs.py +76 -55
  62. squirrels/_parameter_options.py +348 -0
  63. squirrels/_parameter_sets.py +53 -45
  64. squirrels/_parameters.py +1664 -0
  65. squirrels/_project.py +403 -242
  66. squirrels/_py_module.py +3 -2
  67. squirrels/_request_context.py +33 -0
  68. squirrels/_schemas/__init__.py +0 -0
  69. squirrels/_schemas/auth_models.py +167 -0
  70. squirrels/_schemas/query_param_models.py +75 -0
  71. squirrels/{_api_response_models.py → _schemas/response_models.py} +48 -18
  72. squirrels/_seeds.py +1 -1
  73. squirrels/_sources.py +23 -19
  74. squirrels/_utils.py +121 -39
  75. squirrels/_version.py +1 -1
  76. squirrels/arguments.py +7 -0
  77. squirrels/auth.py +4 -0
  78. squirrels/connections.py +3 -0
  79. squirrels/dashboards.py +2 -81
  80. squirrels/data_sources.py +14 -563
  81. squirrels/parameter_options.py +13 -348
  82. squirrels/parameters.py +14 -1266
  83. squirrels/types.py +16 -0
  84. {squirrels-0.5.0rc0.dist-info → squirrels-0.5.1.dist-info}/METADATA +42 -30
  85. squirrels-0.5.1.dist-info/RECORD +98 -0
  86. squirrels/package_data/base_project/dashboards/dashboard_example.yml +0 -22
  87. squirrels/package_data/base_project/macros/macros_example.sql +0 -15
  88. squirrels/package_data/base_project/models/dbviews/dbview_example.sql +0 -12
  89. squirrels/package_data/base_project/models/dbviews/dbview_example.yml +0 -26
  90. squirrels/package_data/base_project/models/federates/federate_example.py +0 -44
  91. squirrels/package_data/base_project/models/federates/federate_example.sql +0 -17
  92. squirrels/package_data/base_project/pyconfigs/connections.py +0 -14
  93. squirrels/package_data/base_project/pyconfigs/parameters.py +0 -93
  94. squirrels/package_data/base_project/pyconfigs/user.py +0 -23
  95. squirrels/package_data/base_project/squirrels.yml.j2 +0 -71
  96. squirrels-0.5.0rc0.dist-info/RECORD +0 -70
  97. /squirrels/{package_data → _package_data}/base_project/assets/expenses.db +0 -0
  98. /squirrels/{package_data → _package_data}/base_project/assets/weather.db +0 -0
  99. /squirrels/{package_data → _package_data}/base_project/docker/.dockerignore +0 -0
  100. /squirrels/{package_data → _package_data}/base_project/docker/Dockerfile +0 -0
  101. /squirrels/{package_data → _package_data}/base_project/docker/compose.yml +0 -0
  102. /squirrels/{package_data/base_project/.gitignore → _package_data/base_project/gitignore} +0 -0
  103. /squirrels/{package_data → _package_data}/base_project/seeds/seed_categories.csv +0 -0
  104. /squirrels/{package_data → _package_data}/base_project/seeds/seed_subcategories.csv +0 -0
  105. /squirrels/{package_data → _package_data}/base_project/tmp/.gitignore +0 -0
  106. {squirrels-0.5.0rc0.dist-info → squirrels-0.5.1.dist-info}/WHEEL +0 -0
  107. {squirrels-0.5.0rc0.dist-info → squirrels-0.5.1.dist-info}/entry_points.txt +0 -0
  108. {squirrels-0.5.0rc0.dist-info → squirrels-0.5.1.dist-info}/licenses/LICENSE +0 -0
squirrels/_project.py CHANGED
@@ -1,92 +1,113 @@
1
- from dotenv import dotenv_values
2
- from uuid import uuid4
1
+ from dotenv import dotenv_values, load_dotenv
2
+ from pathlib import Path
3
3
  import asyncio, typing as t, functools as ft, shutil, json, os
4
- import logging as l, matplotlib.pyplot as plt, networkx as nx, polars as pl
5
- import sqlglot, sqlglot.expressions
4
+ import sqlglot, sqlglot.expressions, duckdb, polars as pl
6
5
 
7
- from ._auth import Authenticator, BaseUser
6
+ from ._auth import Authenticator, AuthProviderArgs, ProviderFunctionType
7
+ from ._schemas.auth_models import CustomUserFields, AbstractUser, GuestUser, RegisteredUser
8
+ from ._schemas import response_models as rm
8
9
  from ._model_builder import ModelBuilder
9
10
  from ._exceptions import InvalidInputError, ConfigurationError
10
- from . import _utils as u, _constants as c, _manifest as mf, _connection_set as cs, _api_response_models as arm
11
+ from ._py_module import PyModule
12
+ from . import _dashboards as d, _utils as u, _constants as c, _manifest as mf, _connection_set as cs
11
13
  from . import _seeds as s, _models as m, _model_configs as mc, _model_queries as mq, _sources as so
12
- from . import _parameter_sets as ps, _dashboards_io as d, dashboards as dash, dataset_result as dr
14
+ from . import _parameter_sets as ps, _dataset_types as dr, _logging as l
13
15
 
14
- T = t.TypeVar("T", bound=dash.Dashboard)
16
+ T = t.TypeVar("T", bound=d.Dashboard)
15
17
  M = t.TypeVar("M", bound=m.DataModel)
16
18
 
17
19
 
18
- class _CustomJsonFormatter(l.Formatter):
19
- def format(self, record: l.LogRecord) -> str:
20
- super().format(record)
21
- info = {
22
- "timestamp": self.formatTime(record),
23
- "project_id": record.name,
24
- "level": record.levelname,
25
- "message": record.getMessage(),
26
- "thread": record.thread,
27
- "thread_name": record.threadName,
28
- "process": record.process,
29
- **record.__dict__.get("info", {})
30
- }
31
- output = {
32
- "data": record.__dict__.get("data", {}),
33
- "info": info
34
- }
35
- return json.dumps(output)
36
-
37
-
38
20
  class SquirrelsProject:
39
21
  """
40
22
  Initiate an instance of this class to interact with a Squirrels project through Python code. For example this can be handy to experiment with the datasets produced by Squirrels in a Jupyter notebook.
41
23
  """
42
24
 
43
- def __init__(self, *, filepath: str = ".", log_file: str | None = c.LOGS_FILE, log_level: str = "INFO", log_format: str = "text") -> None:
25
+ def __init__(
26
+ self, *, filepath: str = ".", load_dotenv_globally: bool = False,
27
+ log_to_file: bool = False, log_level: str | None = None, log_format: str | None = None,
28
+ ) -> None:
44
29
  """
45
30
  Constructor for SquirrelsProject class. Loads the file contents of the Squirrels project into memory as member fields.
46
31
 
47
32
  Arguments:
48
33
  filepath: The path to the Squirrels project file. Defaults to the current working directory.
49
- log_level: The logging level to use. Options are "DEBUG", "INFO", and "WARNING". Default is "INFO".
50
- log_file: The name of the log file to write to from the "logs/" subfolder. If None or empty string, then file logging is disabled. Default is "squirrels.log".
51
- log_format: The format of the log records. Options are "text" and "json". Default is "text".
34
+ log_level: The logging level to use. Options are "DEBUG", "INFO", and "WARNING". Default is from SQRL_LOGGING__LOG_LEVEL environment variable or "INFO".
35
+ log_to_file: Whether to enable logging to file(s) in the "logs/" folder with rotation and retention policies. Default is False.
36
+ log_format: The format of the log records. Options are "text" and "json". Default is from SQRL_LOGGING__LOG_FORMAT environment variable or "text".
52
37
  """
53
38
  self._filepath = filepath
54
- self._logger = self._get_logger(self._filepath, log_file, log_level, log_format)
55
-
56
- def _get_logger(self, base_path: str, log_file: str | None, log_level: str, log_format: str) -> u.Logger:
57
- logger = u.Logger(name=uuid4().hex)
58
- logger.setLevel(log_level.upper())
59
-
60
- handler = l.StreamHandler()
61
- handler.setLevel("WARNING")
62
- handler.setFormatter(l.Formatter("%(levelname)s: %(asctime)s - %(message)s"))
63
- logger.addHandler(handler)
64
-
65
- if log_format.lower() == "json":
66
- formatter = _CustomJsonFormatter()
67
- elif log_format.lower() == "text":
68
- formatter = l.Formatter("[%(name)s] %(asctime)s - %(levelname)s - %(message)s")
69
- else:
70
- raise ValueError("log_format must be either 'text' or 'json'")
39
+ self._load_dotenv_globally = load_dotenv_globally
40
+ self._logger = self._get_logger(filepath, log_to_file, log_level, log_format)
41
+ self._ensure_virtual_datalake_exists(filepath)
42
+
43
+ def _get_logger(self, filepath: str, log_to_file: bool, log_level: str | None, log_format: str | None) -> u.Logger:
44
+ env_vars = self._env_vars
45
+ # CLI arguments take precedence over environment variables
46
+ log_level = log_level if log_level is not None else env_vars.get(c.SQRL_LOGGING_LOG_LEVEL, "INFO")
47
+ log_format = log_format if log_format is not None else env_vars.get(c.SQRL_LOGGING_LOG_FORMAT, "text")
48
+ log_to_file = log_to_file or u.to_bool(env_vars.get(c.SQRL_LOGGING_LOG_TO_FILE, "false"))
49
+ log_file_size_mb = int(env_vars.get(c.SQRL_LOGGING_LOG_FILE_SIZE_MB, 50))
50
+ log_file_backup_count = int(env_vars.get(c.SQRL_LOGGING_LOG_FILE_BACKUP_COUNT, 1))
51
+ return l.get_logger(filepath, log_to_file, log_level, log_format, log_file_size_mb, log_file_backup_count)
52
+
53
+ def _ensure_virtual_datalake_exists(self, project_path: str) -> None:
54
+ target_path = u.Path(project_path, c.TARGET_FOLDER)
55
+ target_path.mkdir(parents=True, exist_ok=True)
56
+
57
+ # Attempt to set up the virtual data lake with DATA_PATH if possible
58
+ try:
59
+ is_ducklake = self._datalake_db_path.startswith("ducklake:")
71
60
 
72
- if log_file:
73
- path = u.Path(base_path, c.LOGS_FOLDER, log_file)
74
- path.parent.mkdir(parents=True, exist_ok=True)
75
-
76
- handler = l.FileHandler(path)
77
- handler.setFormatter(formatter)
78
- logger.addHandler(handler)
61
+ data_path = self._env_vars.get(c.SQRL_VDL_DATA_PATH, c.DEFAULT_VDL_DATA_PATH)
62
+ data_path = data_path.format(project_path=project_path)
63
+
64
+ options = f"(DATA_PATH '{data_path}')" if is_ducklake else ""
65
+ attach_stmt = f"ATTACH '{self._datalake_db_path}' AS vdl {options}"
66
+ with duckdb.connect() as conn:
67
+ conn.execute(attach_stmt)
68
+ # TODO: support incremental loads for build models and avoid cleaning up old files all the time
69
+ conn.execute("CALL ducklake_expire_snapshots('vdl', older_than => now())")
70
+ conn.execute("CALL ducklake_cleanup_old_files('vdl', cleanup_all => true)")
79
71
 
80
- return logger
72
+ except Exception as e:
73
+ if "DATA_PATH parameter" in str(e):
74
+ first_line = str(e).split("\n")[0]
75
+ note = "NOTE: Squirrels does not allow changing the data path for an existing Virtual Data Lake (VDL)"
76
+ raise u.ConfigurationError(f"{first_line}\n\n{note}")
77
+
78
+ if is_ducklake and not any(x in self._datalake_db_path for x in [":sqlite:", ":postgres:", ":mysql:"]):
79
+ extended_error = "\n Note: if you're using DuckDB for the metadata database, only one process can connect to the VDL at a time."
80
+ else:
81
+ extended_error = ""
82
+
83
+ raise u.ConfigurationError(f"Failed to attach Virtual Data Lake (VDL).{extended_error}") from e
81
84
 
82
85
  @ft.cached_property
83
86
  def _env_vars(self) -> dict[str, str]:
84
87
  dotenv_files = [c.DOTENV_FILE, c.DOTENV_LOCAL_FILE]
85
88
  dotenv_vars = {}
86
89
  for file in dotenv_files:
87
- dotenv_vars.update({k: v for k, v in dotenv_values(f"{self._filepath}/{file}").items() if v is not None})
90
+ full_path = u.Path(self._filepath, file)
91
+ if self._load_dotenv_globally:
92
+ load_dotenv(full_path)
93
+ dotenv_vars.update({k: v for k, v in dotenv_values(full_path).items() if v is not None})
88
94
  return {**os.environ, **dotenv_vars}
89
95
 
96
+ @ft.cached_property
97
+ def _elevated_access_level(self) -> u.ACCESS_LEVEL:
98
+ elevated_access_level = self._env_vars.get(c.SQRL_PERMISSIONS_ELEVATED_ACCESS_LEVEL, "admin").lower()
99
+
100
+ if elevated_access_level not in ["admin", "member", "guest"]:
101
+ raise u.ConfigurationError(f"{c.SQRL_PERMISSIONS_ELEVATED_ACCESS_LEVEL} has been set to an invalid access level: {elevated_access_level}")
102
+
103
+ return elevated_access_level
104
+
105
+ @ft.cached_property
106
+ def _datalake_db_path(self) -> str:
107
+ datalake_db_path = self._env_vars.get(c.SQRL_VDL_CATALOG_DB_PATH, c.DEFAULT_VDL_CATALOG_DB_PATH)
108
+ datalake_db_path = datalake_db_path.format(project_path=self._filepath)
109
+ return datalake_db_path
110
+
90
111
  @ft.cached_property
91
112
  def _manifest_cfg(self) -> mf.ManifestConfig:
92
113
  return mf.ManifestIO.load_from_file(self._logger, self._filepath, self._env_vars)
@@ -127,29 +148,77 @@ class SquirrelsProject:
127
148
  def _conn_set(self) -> cs.ConnectionSet:
128
149
  return cs.ConnectionSetIO.load_from_file(self._logger, self._filepath, self._manifest_cfg, self._conn_args)
129
150
 
151
+ @ft.cached_property
152
+ def _custom_user_fields_cls_and_provider_functions(self) -> tuple[type[CustomUserFields], list[ProviderFunctionType]]:
153
+ user_module_path = u.Path(self._filepath, c.PYCONFIGS_FOLDER, c.USER_FILE)
154
+ user_module = PyModule(user_module_path)
155
+
156
+ # Load CustomUserFields class (adds to Authenticator.providers as side effect)
157
+ CustomUserFieldsCls = user_module.get_func_or_class("CustomUserFields", default_attr=CustomUserFields)
158
+ provider_functions = Authenticator.providers
159
+ Authenticator.providers = []
160
+
161
+ if not issubclass(CustomUserFieldsCls, CustomUserFields):
162
+ raise ConfigurationError(f"CustomUserFields class in '{c.USER_FILE}' must inherit from CustomUserFields")
163
+
164
+ return CustomUserFieldsCls, provider_functions
165
+
166
+ @ft.cached_property
167
+ def _auth_args(self) -> AuthProviderArgs:
168
+ conn_args = self._conn_args
169
+ return AuthProviderArgs(conn_args.project_path, conn_args.proj_vars, conn_args.env_vars)
170
+
130
171
  @ft.cached_property
131
172
  def _auth(self) -> Authenticator:
132
- return Authenticator(self._logger, self._filepath, self._env_vars)
173
+ CustomUserFieldsCls, provider_functions = self._custom_user_fields_cls_and_provider_functions
174
+ external_only = (self._manifest_cfg.authentication.type == mf.AuthenticationType.EXTERNAL)
175
+ return Authenticator(self._logger, self._filepath, self._auth_args, provider_functions, custom_user_fields_cls=CustomUserFieldsCls, external_only=external_only)
176
+
177
+ @ft.cached_property
178
+ def _guest_user(self) -> AbstractUser:
179
+ custom_fields = self._auth.CustomUserFields()
180
+ return GuestUser(username="", custom_fields=custom_fields)
181
+
182
+ @ft.cached_property
183
+ def _admin_user(self) -> AbstractUser:
184
+ custom_fields = self._auth.CustomUserFields()
185
+ return RegisteredUser(username="", access_level="admin", custom_fields=custom_fields)
133
186
 
134
187
  @ft.cached_property
135
188
  def _param_args(self) -> ps.ParametersArgs:
136
- return ps.ParameterConfigsSetIO.get_param_args(self._conn_args)
189
+ conn_args = self._conn_args
190
+ return ps.ParametersArgs(conn_args.project_path, conn_args.proj_vars, conn_args.env_vars)
137
191
 
138
192
  @ft.cached_property
139
193
  def _param_cfg_set(self) -> ps.ParameterConfigsSet:
140
194
  return ps.ParameterConfigsSetIO.load_from_file(
141
- self._logger, self._filepath, self._manifest_cfg, self._seeds, self._conn_set, self._param_args
195
+ self._logger, self._filepath, self._manifest_cfg, self._seeds, self._conn_set, self._param_args, self._datalake_db_path
142
196
  )
143
197
 
144
198
  @ft.cached_property
145
199
  def _j2_env(self) -> u.EnvironmentWithMacros:
146
- return u.EnvironmentWithMacros(self._logger, loader=u.j2.FileSystemLoader(self._filepath))
200
+ env = u.EnvironmentWithMacros(self._logger, loader=u.j2.FileSystemLoader(self._filepath))
201
+
202
+ def value_to_str(value: t.Any, attribute: str | None = None) -> str:
203
+ if attribute is None:
204
+ return str(value)
205
+ else:
206
+ return str(getattr(value, attribute))
207
+
208
+ def join(value: list[t.Any], d: str = ", ", attribute: str | None = None) -> str:
209
+ return d.join(map(lambda x: value_to_str(x, attribute), value))
210
+
211
+ def quote(value: t.Any, q: str = "'", attribute: str | None = None) -> str:
212
+ return q + value_to_str(value, attribute) + q
213
+
214
+ def quote_and_join(value: list[t.Any], q: str = "'", d: str = ", ", attribute: str | None = None) -> str:
215
+ return d.join(map(lambda x: quote(x, q, attribute), value))
216
+
217
+ env.filters["join"] = join
218
+ env.filters["quote"] = quote
219
+ env.filters["quote_and_join"] = quote_and_join
220
+ return env
147
221
 
148
- @ft.cached_property
149
- def _duckdb_venv_path(self) -> str:
150
- duckdb_filepath_setting_val = self._env_vars.get(c.SQRL_DUCKDB_VENV_DB_FILE_PATH, f"{c.TARGET_FOLDER}/{c.DUCKDB_VENV_FILE}")
151
- return str(u.Path(self._filepath, duckdb_filepath_setting_val))
152
-
153
222
  def close(self) -> None:
154
223
  """
155
224
  Deliberately close any open resources within the Squirrels project, such as database connections (instead of relying on the garbage collector).
@@ -184,20 +253,20 @@ class SquirrelsProject:
184
253
  return models_dict
185
254
 
186
255
 
187
- async def build(self, *, full_refresh: bool = False, select: str | None = None, stage_file: bool = False) -> None:
256
+ async def build(self, *, full_refresh: bool = False, select: str | None = None) -> None:
188
257
  """
189
- Build the virtual data environment for the Squirrels project
258
+ Build the Virtual Data Lake (VDL) for the Squirrels project
190
259
 
191
260
  Arguments:
192
- full_refresh: Whether to drop all tables and rebuild the virtual data environment from scratch. Default is False.
193
- stage_file: Whether to stage the DuckDB file to overwrite the existing one later if the virtual data environment is in use. Default is False.
261
+ full_refresh: Whether to drop all tables and rebuild the VDL from scratch. Default is False.
262
+ select: The name of a specific model to build. If None, all models are built. Default is None.
194
263
  """
195
264
  models_dict: dict[str, m.StaticModel] = self._get_static_models()
196
- builder = ModelBuilder(self._duckdb_venv_path, self._conn_set, models_dict, self._conn_args, self._logger)
197
- await builder.build(full_refresh, select, stage_file)
265
+ builder = ModelBuilder(self._datalake_db_path, self._conn_set, models_dict, self._conn_args, self._logger)
266
+ await builder.build(full_refresh, select)
198
267
 
199
268
  def _get_models_dict(self, always_python_df: bool) -> dict[str, m.DataModel]:
200
- models_dict: dict[str, m.DataModel] = dict(self._get_static_models())
269
+ models_dict: dict[str, m.DataModel] = self._get_static_models()
201
270
 
202
271
  for name, val in self._dbview_model_files.items():
203
272
  self._add_model(models_dict, m.DbviewModel(
@@ -213,19 +282,18 @@ class SquirrelsProject:
213
282
 
214
283
  return models_dict
215
284
 
216
- def _generate_dag(self, dataset: str, *, target_model_name: str | None = None, always_python_df: bool = False) -> m.DAG:
217
- models_dict = self._get_models_dict(always_python_df)
285
+ def _generate_dag(self, dataset: str) -> m.DAG:
286
+ models_dict = self._get_models_dict(always_python_df=False)
218
287
 
219
288
  dataset_config = self._manifest_cfg.datasets[dataset]
220
- target_model_name = dataset_config.model if target_model_name is None else target_model_name
221
- target_model = models_dict[target_model_name]
289
+ target_model = models_dict[dataset_config.model]
222
290
  target_model.is_target = True
223
- dag = m.DAG(dataset_config, target_model, models_dict, self._duckdb_venv_path, self._logger)
291
+ dag = m.DAG(dataset_config, target_model, models_dict, self._datalake_db_path, self._logger)
224
292
 
225
293
  return dag
226
294
 
227
- def _generate_dag_with_fake_target(self, sql_query: str | None) -> m.DAG:
228
- models_dict = self._get_models_dict(always_python_df=False)
295
+ def _generate_dag_with_fake_target(self, sql_query: str | None, *, always_python_df: bool = False) -> m.DAG:
296
+ models_dict = self._get_models_dict(always_python_df=always_python_df)
229
297
 
230
298
  if sql_query is None:
231
299
  dependencies = set(models_dict.keys())
@@ -235,227 +303,260 @@ class SquirrelsProject:
235
303
  substitutions = {}
236
304
  for model_name in dependencies:
237
305
  model = models_dict[model_name]
238
- if isinstance(model, m.SourceModel) and not model.model_config.load_to_duckdb:
239
- raise InvalidInputError(203, f"Source model '{model_name}' cannot be queried with DuckDB")
240
- if isinstance(model, (m.SourceModel, m.BuildModel)):
241
- substitutions[model_name] = f"venv.{model_name}"
306
+ if isinstance(model, m.SourceModel) and not model.is_queryable:
307
+ raise InvalidInputError(400, "cannot_query_source_model", f"Source model '{model_name}' cannot be queried with DuckDB")
308
+ if isinstance(model, m.BuildModel):
309
+ substitutions[model_name] = f"vdl.{model_name}"
310
+ elif isinstance(model, m.SourceModel):
311
+ if model.model_config.load_to_vdl:
312
+ substitutions[model_name] = f"vdl.{model_name}"
313
+ else:
314
+ # DuckDB connection without load_to_vdl - reference via attached database
315
+ conn_name = model.model_config.get_connection()
316
+ table_name = model.model_config.get_table()
317
+ substitutions[model_name] = f"db_{conn_name}.{table_name}"
242
318
 
243
319
  sql_query = parsed.transform(
244
- lambda node: sqlglot.expressions.Table(this=substitutions[node.name])
320
+ lambda node: sqlglot.expressions.Table(this=substitutions[node.name], alias=node.alias)
245
321
  if isinstance(node, sqlglot.expressions.Table) and node.name in substitutions
246
322
  else node
247
323
  ).sql()
248
324
 
249
325
  model_config = mc.FederateModelConfig(depends_on=dependencies)
250
- query_file = mq.SqlQueryFile("", sql_query or "")
326
+ query_file = mq.SqlQueryFile("", sql_query or "SELECT 1")
251
327
  fake_target_model = m.FederateModel(
252
328
  "__fake_target", model_config, query_file, logger=self._logger, env_vars=self._env_vars, conn_set=self._conn_set, j2_env=self._j2_env
253
329
  )
254
330
  fake_target_model.is_target = True
255
- dag = m.DAG(None, fake_target_model, models_dict, self._duckdb_venv_path, self._logger)
331
+ dag = m.DAG(None, fake_target_model, models_dict, self._datalake_db_path, self._logger)
256
332
  return dag
257
333
 
258
- def _draw_dag(self, dag: m.DAG, output_folder: u.Path) -> None:
259
- color_map = {
260
- m.ModelType.SEED: "green", m.ModelType.DBVIEW: "red", m.ModelType.FEDERATE: "skyblue",
261
- m.ModelType.BUILD: "purple", m.ModelType.SOURCE: "orange"
262
- }
263
-
264
- G = dag.to_networkx_graph()
265
-
266
- fig, _ = plt.subplots()
267
- pos = nx.multipartite_layout(G, subset_key="layer")
268
- colors = [color_map[node[1]] for node in G.nodes(data="model_type")] # type: ignore
269
- nx.draw(G, pos=pos, node_shape='^', node_size=1000, node_color=colors, arrowsize=20)
270
-
271
- y_values = [val[1] for val in pos.values()]
272
- scale = max(y_values) - min(y_values) if len(y_values) > 0 else 0
273
- label_pos = {key: (val[0], val[1]-0.002-0.1*scale) for key, val in pos.items()}
274
- nx.draw_networkx_labels(G, pos=label_pos, font_size=8)
275
-
276
- fig.tight_layout()
277
- plt.margins(x=0.1, y=0.1)
278
- fig.savefig(u.Path(output_folder, "dag.png"))
279
- plt.close(fig)
280
-
281
- async def _get_compiled_dag(self, *, sql_query: str | None = None, selections: dict[str, t.Any] = {}, user: BaseUser | None = None) -> m.DAG:
282
- dag = self._generate_dag_with_fake_target(sql_query)
334
+ async def _get_compiled_dag(
335
+ self, user: AbstractUser, *, sql_query: str | None = None, selections: dict[str, t.Any] = {}, configurables: dict[str, str] = {},
336
+ always_python_df: bool = False
337
+ ) -> m.DAG:
338
+ dag = self._generate_dag_with_fake_target(sql_query, always_python_df=always_python_df)
283
339
 
284
- default_traits = self._manifest_cfg.get_default_traits()
285
- await dag.execute(self._param_args, self._param_cfg_set, self._context_func, user, selections, runquery=False, default_traits=default_traits)
340
+ configurables = {**self._manifest_cfg.get_default_configurables(), **configurables}
341
+ await dag.execute(
342
+ self._param_args, self._param_cfg_set, self._context_func, user, selections,
343
+ runquery=False, configurables=configurables
344
+ )
286
345
  return dag
287
346
 
288
- def _get_all_connections(self) -> list[arm.ConnectionItemModel]:
347
+ def _get_all_connections(self) -> list[rm.ConnectionItemModel]:
289
348
  connections = []
290
349
  for conn_name, conn_props in self._conn_set.get_connections_as_dict().items():
291
350
  if isinstance(conn_props, mf.ConnectionProperties):
292
351
  label = conn_props.label if conn_props.label is not None else conn_name
293
- connections.append(arm.ConnectionItemModel(name=conn_name, label=label))
352
+ connections.append(rm.ConnectionItemModel(name=conn_name, label=label))
294
353
  return connections
295
354
 
296
- def _get_all_data_models(self, compiled_dag: m.DAG) -> list[arm.DataModelItem]:
355
+ def _get_all_data_models(self, compiled_dag: m.DAG) -> list[rm.DataModelItem]:
297
356
  return compiled_dag.get_all_data_models()
298
357
 
299
- async def get_all_data_models(self) -> list[arm.DataModelItem]:
358
+ async def get_all_data_models(self) -> list[rm.DataModelItem]:
300
359
  """
301
360
  Get all data models in the project
302
361
 
303
362
  Returns:
304
363
  A list of DataModelItem objects
305
364
  """
306
- compiled_dag = await self._get_compiled_dag()
365
+ compiled_dag = await self._get_compiled_dag(self._admin_user)
307
366
  return self._get_all_data_models(compiled_dag)
308
367
 
309
- def _get_all_data_lineage(self, compiled_dag: m.DAG) -> list[arm.LineageRelation]:
368
+ def _get_all_data_lineage(self, compiled_dag: m.DAG) -> list[rm.LineageRelation]:
310
369
  all_lineage = compiled_dag.get_all_model_lineage()
311
370
 
312
371
  # Add dataset nodes to the lineage
313
372
  for dataset in self._manifest_cfg.datasets.values():
314
- target_dataset = arm.LineageNode(name=dataset.name, type="dataset")
315
- source_model = arm.LineageNode(name=dataset.model, type="model")
316
- all_lineage.append(arm.LineageRelation(type="runtime", source=source_model, target=target_dataset))
373
+ target_dataset = rm.LineageNode(name=dataset.name, type="dataset")
374
+ source_model = rm.LineageNode(name=dataset.model, type="model")
375
+ all_lineage.append(rm.LineageRelation(type="runtime", source=source_model, target=target_dataset))
317
376
 
318
377
  # Add dashboard nodes to the lineage
319
378
  for dashboard in self._dashboards.values():
320
- target_dashboard = arm.LineageNode(name=dashboard.dashboard_name, type="dashboard")
379
+ target_dashboard = rm.LineageNode(name=dashboard.dashboard_name, type="dashboard")
321
380
  datasets = set(x.dataset for x in dashboard.config.depends_on)
322
381
  for dataset in datasets:
323
- source_dataset = arm.LineageNode(name=dataset, type="dataset")
324
- all_lineage.append(arm.LineageRelation(type="runtime", source=source_dataset, target=target_dashboard))
382
+ source_dataset = rm.LineageNode(name=dataset, type="dataset")
383
+ all_lineage.append(rm.LineageRelation(type="runtime", source=source_dataset, target=target_dashboard))
325
384
 
326
385
  return all_lineage
327
386
 
328
- async def get_all_data_lineage(self) -> list[arm.LineageRelation]:
387
+ async def get_all_data_lineage(self) -> list[rm.LineageRelation]:
329
388
  """
330
389
  Get all data lineage in the project
331
390
 
332
391
  Returns:
333
392
  A list of LineageRelation objects
334
393
  """
335
- compiled_dag = await self._get_compiled_dag()
394
+ compiled_dag = await self._get_compiled_dag(self._admin_user)
336
395
  return self._get_all_data_lineage(compiled_dag)
337
396
 
338
- async def _write_dataset_outputs_given_test_set(
339
- self, dataset: str, select: str, test_set: str | None, runquery: bool, recurse: bool
340
- ) -> t.Any | None:
341
- dataset_conf = self._manifest_cfg.datasets[dataset]
342
- default_test_set_conf = self._manifest_cfg.get_default_test_set(dataset)
343
- if test_set in self._manifest_cfg.selection_test_sets:
344
- test_set_conf = self._manifest_cfg.selection_test_sets[test_set]
345
- elif test_set is None or test_set == default_test_set_conf.name:
346
- test_set, test_set_conf = default_test_set_conf.name, default_test_set_conf
347
- else:
348
- raise ConfigurationError(f"No test set named '{test_set}' was found when compiling dataset '{dataset}'. The test set must be defined if not default for dataset.")
349
-
350
- error_msg_intro = f"Cannot compile dataset '{dataset}' with test set '{test_set}'."
351
- if test_set_conf.datasets is not None and dataset not in test_set_conf.datasets:
352
- raise ConfigurationError(f"{error_msg_intro}\n Applicable datasets for test set '{test_set}' does not include dataset '{dataset}'.")
353
-
354
- user_attributes = test_set_conf.user_attributes.copy() if test_set_conf.user_attributes is not None else {}
355
- selections = test_set_conf.parameters.copy()
356
- username, is_admin = user_attributes.pop("username", ""), user_attributes.pop("is_admin", False)
357
- if test_set_conf.is_authenticated:
358
- user = self._auth.User(username=username, is_admin=is_admin, **user_attributes)
359
- elif dataset_conf.scope == mf.PermissionScope.PUBLIC:
360
- user = None
361
- else:
362
- raise ConfigurationError(f"{error_msg_intro}\n Non-public datasets require a test set with 'user_attributes' section defined")
363
-
364
- if dataset_conf.scope == mf.PermissionScope.PRIVATE and not is_admin:
365
- raise ConfigurationError(f"{error_msg_intro}\n Private datasets require a test set with user_attribute 'is_admin' set to true")
366
-
367
- # always_python_df is set to True for creating CSV files from results (when runquery is True)
368
- dag = self._generate_dag(dataset, target_model_name=select, always_python_df=runquery)
369
- await dag.execute(
370
- self._param_args, self._param_cfg_set, self._context_func, user, selections,
371
- runquery=runquery, recurse=recurse, default_traits=self._manifest_cfg.get_default_traits()
372
- )
373
-
374
- output_folder = u.Path(self._filepath, c.TARGET_FOLDER, c.COMPILE_FOLDER, dataset, test_set)
375
- if output_folder.exists():
376
- shutil.rmtree(output_folder)
377
- output_folder.mkdir(parents=True, exist_ok=True)
378
-
379
- def write_placeholders() -> None:
380
- output_filepath = u.Path(output_folder, "placeholders.json")
381
- with open(output_filepath, 'w') as f:
382
- json.dump(dag.placeholders, f, indent=4)
383
-
384
- def write_model_outputs(model: m.DataModel) -> None:
385
- assert isinstance(model, m.QueryModel)
386
- subfolder = c.DBVIEWS_FOLDER if model.model_type == m.ModelType.DBVIEW else c.FEDERATES_FOLDER
387
- subpath = u.Path(output_folder, subfolder)
388
- subpath.mkdir(parents=True, exist_ok=True)
389
- if isinstance(model.compiled_query, mq.SqlModelQuery):
390
- output_filepath = u.Path(subpath, model.name+'.sql')
391
- query = model.compiled_query.query
392
- with open(output_filepath, 'w') as f:
393
- f.write(query)
394
- if runquery and isinstance(model.result, pl.LazyFrame):
395
- output_filepath = u.Path(subpath, model.name+'.csv')
396
- model.result.collect().write_csv(output_filepath)
397
-
398
- write_placeholders()
399
- all_model_names = dag.get_all_query_models()
400
- coroutines = [asyncio.to_thread(write_model_outputs, dag.models_dict[name]) for name in all_model_names]
401
- await u.asyncio_gather(coroutines)
402
-
403
- if recurse:
404
- self._draw_dag(dag, output_folder)
405
-
406
- if isinstance(dag.target_model, m.QueryModel) and dag.target_model.compiled_query is not None:
407
- return dag.target_model.compiled_query.query
408
-
409
397
  async def compile(
410
- self, *, dataset: str | None = None, do_all_datasets: bool = False, selected_model: str | None = None, test_set: str | None = None,
411
- do_all_test_sets: bool = False, runquery: bool = False
398
+ self, *, selected_model: str | None = None, test_set: str | None = None, do_all_test_sets: bool = False,
399
+ runquery: bool = False, clear: bool = False, buildtime_only: bool = False, runtime_only: bool = False
412
400
  ) -> None:
413
401
  """
414
- Async method to compile the SQL templates into files in the "target/" folder. Same functionality as the "sqrl compile" CLI.
402
+ Compile models into the "target/compile" folder.
415
403
 
416
- Although all arguments are "optional", the "dataset" argument is required if "do_all_datasets" argument is False.
404
+ Behavior:
405
+ - Buildtime outputs: target/compile/buildtime/*.sql (for SQL build models) and dag.png
406
+ - Runtime outputs: target/compile/runtime/[test_set]/dbviews/*.sql, federates/*.sql, dag.png
407
+ If runquery=True, also write CSVs for runtime models.
408
+ - Options: clear entire compile folder first; compile only buildtime or only runtime.
417
409
 
418
410
  Arguments:
419
- dataset: The name of the dataset to compile. Ignored if "do_all_datasets" argument is True, but required (i.e., cannot be None) if "do_all_datasets" is False. Default is None.
420
- do_all_datasets: If True, compile all datasets and ignore the "dataset" argument. Default is False.
421
411
  selected_model: The name of the model to compile. If specified, the compiled SQL query is also printed in the terminal. If None, all models for the selected dataset are compiled. Default is None.
422
412
  test_set: The name of the test set to compile with. If None, the default test set is used (which can vary by dataset). Ignored if `do_all_test_sets` argument is True. Default is None.
423
413
  do_all_test_sets: Whether to compile all applicable test sets for the selected dataset(s). If True, the `test_set` argument is ignored. Default is False.
424
- runquery**: Whether to run all compiled queries and save each result as a CSV file. If True and `selected_model` is specified, all upstream models of the selected model is compiled as well. Default is False.
414
+ runquery: Whether to run all compiled queries and save each result as a CSV file. If True and `selected_model` is specified, all upstream models of the selected model is compiled as well. Default is False.
415
+ clear: Whether to clear the "target/compile/" folder before compiling. Default is False.
416
+ buildtime_only: Whether to compile only buildtime models. Default is False.
417
+ runtime_only: Whether to compile only runtime models. Default is False.
425
418
  """
426
- recurse = True
427
- if do_all_datasets:
428
- selected_models = [(dataset.name, dataset.model) for dataset in self._manifest_cfg.datasets.values()]
429
- else:
430
- assert isinstance(dataset, str), "argument 'dataset' must be provided a string value if argument 'do_all_datasets' is False"
431
- assert dataset in self._manifest_cfg.datasets, f"dataset '{dataset}' not found in {c.MANIFEST_FILE}"
432
- if selected_model is None:
433
- selected_model = self._manifest_cfg.datasets[dataset].model
434
- else:
435
- recurse = False
436
- selected_models = [(dataset, selected_model)]
419
+ border = "=" * 80
420
+ underlines = "-" * len(border)
421
+
422
+ compile_root = Path(self._filepath, c.TARGET_FOLDER, c.COMPILE_FOLDER)
423
+ if clear and compile_root.exists():
424
+ shutil.rmtree(compile_root)
425
+
426
+ models_dict = self._get_models_dict(always_python_df=False)
427
+
428
+ if selected_model is not None:
429
+ selected_model = u.normalize_name(selected_model)
430
+ if selected_model not in models_dict:
431
+ print(f"No such model found: {selected_model}")
432
+ return
433
+ if not isinstance(models_dict[selected_model], m.QueryModel):
434
+ print(f"Model '{selected_model}' is not a query model. Nothing to do.")
435
+ return
437
436
 
438
- coroutines: list[t.Coroutine] = []
439
- for dataset, selected_model in selected_models:
440
- if do_all_test_sets:
441
- for test_set_name in self._manifest_cfg.get_applicable_test_sets(dataset):
442
- coroutine = self._write_dataset_outputs_given_test_set(dataset, selected_model, test_set_name, runquery, recurse)
443
- coroutines.append(coroutine)
437
+ model_to_compile = None
438
+
439
+ # Buildtime compilation
440
+ if not runtime_only:
441
+ print(underlines)
442
+ print(f"Compiling buildtime models")
443
+ print(underlines)
444
+
445
+ buildtime_folder = Path(compile_root, c.COMPILE_BUILDTIME_FOLDER)
446
+ buildtime_folder.mkdir(parents=True, exist_ok=True)
447
+
448
+ def write_buildtime_model(model: m.DataModel, static_models: dict[str, m.StaticModel]) -> None:
449
+ if not isinstance(model, m.BuildModel):
450
+ return
451
+
452
+ model.compile_for_build(self._conn_args, static_models)
453
+
454
+ if isinstance(model.compiled_query, mq.SqlModelQuery):
455
+ out_path = Path(buildtime_folder, f"{model.name}.sql")
456
+ with open(out_path, 'w') as f:
457
+ f.write(model.compiled_query.query)
458
+ print(f"Successfully compiled build model: {model.name}")
459
+ elif isinstance(model.compiled_query, mq.PyModelQuery):
460
+ print(f"The build model '{model.name}' is in Python. Compilation for Python is not supported yet.")
461
+
462
+ static_models = self._get_static_models()
463
+ if selected_model is not None:
464
+ model_to_compile = models_dict[selected_model]
465
+ write_buildtime_model(model_to_compile, static_models)
466
+ else:
467
+ coros = [asyncio.to_thread(write_buildtime_model, m, static_models) for m in static_models.values()]
468
+ await u.asyncio_gather(coros)
444
469
 
445
- coroutine = self._write_dataset_outputs_given_test_set(dataset, selected_model, test_set, runquery, recurse)
446
- coroutines.append(coroutine)
447
-
448
- queries = await u.asyncio_gather(coroutines)
470
+ print(underlines)
471
+ print()
449
472
 
450
- print(f"Compiled successfully! See the '{c.TARGET_FOLDER}/' folder for results.")
451
- print()
452
- if not recurse and len(queries) == 1 and isinstance(queries[0], str):
453
- print(queries[0])
473
+ # Runtime compilation
474
+ if not buildtime_only:
475
+ if do_all_test_sets:
476
+ test_set_names_set = set(self._manifest_cfg.selection_test_sets.keys())
477
+ test_set_names_set.add(c.DEFAULT_TEST_SET_NAME)
478
+ test_set_names = list(test_set_names_set)
479
+ else:
480
+ test_set_names = [test_set or c.DEFAULT_TEST_SET_NAME]
481
+
482
+ for ts_name in test_set_names:
483
+ print(underlines)
484
+ print(f"Compiling runtime models (test set '{ts_name}')")
485
+ print(underlines)
486
+
487
+ # Build user and selections from test set config if present
488
+ ts_conf = self._manifest_cfg.selection_test_sets.get(ts_name, self._manifest_cfg.get_default_test_set())
489
+ # Separate base fields from custom fields
490
+ access_level = ts_conf.user.access_level
491
+ custom_fields = self._auth.CustomUserFields(**ts_conf.user.custom_fields)
492
+ if access_level == "guest":
493
+ user = GuestUser(username="", custom_fields=custom_fields)
494
+ else:
495
+ user = RegisteredUser(username="", access_level=access_level, custom_fields=custom_fields)
496
+
497
+ # Generate DAG across all models. When runquery=True, force models to produce Python dataframes so CSVs can be written.
498
+ dag = await self._get_compiled_dag(
499
+ user=user, selections=ts_conf.parameters, configurables=ts_conf.configurables, always_python_df=runquery,
500
+ )
501
+ if runquery:
502
+ await dag._run_models()
503
+
504
+ # Prepare output folders
505
+ runtime_folder = Path(compile_root, c.COMPILE_RUNTIME_FOLDER, ts_name)
506
+ dbviews_folder = Path(runtime_folder, c.DBVIEWS_FOLDER)
507
+ federates_folder = Path(runtime_folder, c.FEDERATES_FOLDER)
508
+ dbviews_folder.mkdir(parents=True, exist_ok=True)
509
+ federates_folder.mkdir(parents=True, exist_ok=True)
510
+ with open(Path(runtime_folder, "placeholders.json"), "w") as f:
511
+ json.dump(dag.placeholders, f)
512
+
513
+ # Function to write runtime models
514
+ def write_runtime_model(model: m.DataModel) -> None:
515
+ if not isinstance(model, m.QueryModel):
516
+ return
517
+
518
+ if model.model_type not in (m.ModelType.DBVIEW, m.ModelType.FEDERATE):
519
+ return
520
+
521
+ subfolder = dbviews_folder if model.model_type == m.ModelType.DBVIEW else federates_folder
522
+ model_type = "dbview" if model.model_type == m.ModelType.DBVIEW else "federate"
523
+
524
+ if isinstance(model.compiled_query, mq.SqlModelQuery):
525
+ out_sql = Path(subfolder, f"{model.name}.sql")
526
+ with open(out_sql, 'w') as f:
527
+ f.write(model.compiled_query.query)
528
+ print(f"Successfully compiled {model_type} model: {model.name}")
529
+ elif isinstance(model.compiled_query, mq.PyModelQuery):
530
+ print(f"The {model_type} model '{model.name}' is in Python. Compilation for Python is not supported yet.")
531
+
532
+ if runquery and isinstance(model.result, pl.LazyFrame):
533
+ out_csv = Path(subfolder, f"{model.name}.csv")
534
+ model.result.collect().write_csv(out_csv)
535
+ print(f"Successfully created CSV for {model_type} model: {model.name}")
536
+
537
+ # If selected_model is provided for runtime, only emit that model's outputs
538
+ if selected_model is not None:
539
+ model_to_compile = dag.models_dict[selected_model]
540
+ write_runtime_model(model_to_compile)
541
+ else:
542
+ coros = [asyncio.to_thread(write_runtime_model, model) for model in dag.models_dict.values()]
543
+ await u.asyncio_gather(coros)
544
+
545
+ print(underlines)
546
+ print()
547
+
548
+ print(f"All compilations complete! See the '{c.TARGET_FOLDER}/{c.COMPILE_FOLDER}/' folder for results.")
549
+ if model_to_compile and isinstance(model_to_compile, m.QueryModel) and isinstance(model_to_compile.compiled_query, mq.SqlModelQuery):
550
+ print()
551
+ print(border)
552
+ print(f"Compiled SQL query for model '{model_to_compile.name}':")
553
+ print(underlines)
554
+ print(model_to_compile.compiled_query.query)
555
+ print(border)
454
556
  print()
455
557
 
456
- def _permission_error(self, user: BaseUser | None, data_type: str, data_name: str, scope: str) -> InvalidInputError:
457
- username = "" if user is None else f" '{user.username}'"
458
- return InvalidInputError(25, f"User{username} does not have permission to access {scope} {data_type}: {data_name}")
558
+ def _permission_error(self, user: AbstractUser, data_type: str, data_name: str, scope: str) -> InvalidInputError:
559
+ return InvalidInputError(403, f"unauthorized_access_to_{data_type}", f"User '{user}' does not have permission to access {scope} {data_type}: {data_name}")
459
560
 
460
561
  def seed(self, name: str) -> pl.LazyFrame:
461
562
  """
@@ -491,7 +592,8 @@ class SquirrelsProject:
491
592
  )
492
593
 
493
594
  async def dataset(
494
- self, name: str, *, selections: dict[str, t.Any] = {}, user: BaseUser | None = None, require_auth: bool = True
595
+ self, name: str, *, selections: dict[str, t.Any] = {}, user: AbstractUser | None = None, require_auth: bool = True,
596
+ configurables: dict[str, str] = {}
495
597
  ) -> dr.DatasetResult:
496
598
  """
497
599
  Async method to retrieve a dataset as a DatasetResult object (with metadata) given parameter selections.
@@ -504,14 +606,17 @@ class SquirrelsProject:
504
606
  Returns:
505
607
  A DatasetResult object containing the dataset result (as a polars DataFrame), its description, and the column details.
506
608
  """
609
+ if user is None:
610
+ user = self._guest_user
611
+
507
612
  scope = self._manifest_cfg.datasets[name].scope
508
613
  if require_auth and not self._auth.can_user_access_scope(user, scope):
509
614
  raise self._permission_error(user, "dataset", name, scope.name)
510
615
 
511
616
  dag = self._generate_dag(name)
617
+ configurables = {**self._manifest_cfg.get_default_configurables(name), **configurables}
512
618
  await dag.execute(
513
- self._param_args, self._param_cfg_set, self._context_func, user, dict(selections),
514
- default_traits=self._manifest_cfg.get_default_traits()
619
+ self._param_args, self._param_cfg_set, self._context_func, user, dict(selections), configurables=configurables
515
620
  )
516
621
  assert isinstance(dag.target_model.result, pl.LazyFrame)
517
622
  return dr.DatasetResult(
@@ -520,7 +625,8 @@ class SquirrelsProject:
520
625
  )
521
626
 
522
627
  async def dashboard(
523
- self, name: str, *, selections: dict[str, t.Any] = {}, user: BaseUser | None = None, dashboard_type: t.Type[T] = dash.Dashboard
628
+ self, name: str, *, selections: dict[str, t.Any] = {}, user: AbstractUser | None = None, dashboard_type: t.Type[T] = d.PngDashboard,
629
+ configurables: dict[str, str] = {}
524
630
  ) -> T:
525
631
  """
526
632
  Async method to retrieve a dashboard given parameter selections.
@@ -534,13 +640,18 @@ class SquirrelsProject:
534
640
  Returns:
535
641
  The dashboard type specified by the "dashboard_type" argument.
536
642
  """
643
+ if user is None:
644
+ user = self._guest_user
645
+
537
646
  scope = self._dashboards[name].config.scope
538
647
  if not self._auth.can_user_access_scope(user, scope):
539
648
  raise self._permission_error(user, "dashboard", name, scope.name)
540
649
 
541
650
  async def get_dataset_df(dataset_name: str, fixed_params: dict[str, t.Any]) -> pl.DataFrame:
542
651
  final_selections = {**selections, **fixed_params}
543
- result = await self.dataset(dataset_name, selections=final_selections, user=user, require_auth=False)
652
+ result = await self.dataset(
653
+ dataset_name, selections=final_selections, user=user, require_auth=False, configurables=configurables
654
+ )
544
655
  return result.df
545
656
 
546
657
  args = d.DashboardArgs(self._param_args, get_dataset_df)
@@ -550,12 +661,62 @@ class SquirrelsProject:
550
661
  raise KeyError(f"No dashboard file found for: {name}")
551
662
 
552
663
  async def query_models(
553
- self, sql_query: str, *, selections: dict[str, t.Any] = {}, user: BaseUser | None = None
664
+ self, sql_query: str, *, user: AbstractUser | None = None, selections: dict[str, t.Any] = {}, configurables: dict[str, str] = {}
554
665
  ) -> dr.DatasetResult:
555
- dag = await self._get_compiled_dag(sql_query=sql_query, selections=selections, user=user)
666
+ if user is None:
667
+ user = self._guest_user
668
+
669
+ dag = await self._get_compiled_dag(user=user, sql_query=sql_query, selections=selections, configurables=configurables)
556
670
  await dag._run_models()
557
671
  assert isinstance(dag.target_model.result, pl.LazyFrame)
558
672
  return dr.DatasetResult(
559
673
  target_model_config=dag.target_model.model_config,
560
674
  df=dag.target_model.result.collect().with_row_index("_row_num", offset=1)
561
675
  )
676
+
677
+ async def get_compiled_model_query(
678
+ self, model_name: str, *, user: AbstractUser | None = None, selections: dict[str, t.Any] = {}, configurables: dict[str, str] = {}
679
+ ) -> rm.CompiledQueryModel:
680
+ """
681
+ Compile the specified data model and return its language and compiled definition.
682
+ """
683
+ if user is None:
684
+ user = self._guest_user
685
+
686
+ name = u.normalize_name(model_name)
687
+ models_dict = self._get_models_dict(always_python_df=False)
688
+ if name not in models_dict:
689
+ raise InvalidInputError(404, "model_not_found", f"No data model found with name: {model_name}")
690
+
691
+ model = models_dict[name]
692
+ # Only build, dbview, and federate models support runtime compiled definition in this context
693
+ if not isinstance(model, (m.BuildModel, m.DbviewModel, m.FederateModel)):
694
+ raise InvalidInputError(400, "unsupported_model_type", "Only build, dbview, and federate models currently support compiled definition via this endpoint")
695
+
696
+ # Build a DAG with this model as the target, without a dataset context
697
+ model.is_target = True
698
+ dag = m.DAG(None, model, models_dict, self._datalake_db_path, self._logger)
699
+
700
+ cfg = {**self._manifest_cfg.get_default_configurables(), **configurables}
701
+ await dag.execute(
702
+ self._param_args, self._param_cfg_set, self._context_func, user, selections, runquery=False, configurables=cfg
703
+ )
704
+
705
+ language = "sql" if isinstance(model.query_file, mq.SqlQueryFile) else "python"
706
+ if isinstance(model, m.BuildModel):
707
+ # Compile SQL build models; Python build models not yet supported
708
+ if isinstance(model.query_file, mq.SqlQueryFile):
709
+ static_models = self._get_static_models()
710
+ compiled = model._compile_sql_model(model.query_file, self._conn_args, static_models)
711
+ definition = compiled.query
712
+ else:
713
+ definition = "# Compiling Python build models is currently not supported. This will be available in a future version of Squirrels..."
714
+ elif isinstance(model.compiled_query, mq.SqlModelQuery):
715
+ definition = model.compiled_query.query
716
+ elif isinstance(model.compiled_query, mq.PyModelQuery):
717
+ definition = "# Compiling Python data models is currently not supported. This will be available in a future version of Squirrels..."
718
+ else:
719
+ raise NotImplementedError(f"Query type not supported: {model.compiled_query.__class__.__name__}")
720
+
721
+ return rm.CompiledQueryModel(language=language, definition=definition, placeholders=dag.placeholders)
722
+