squirrels 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of squirrels might be problematic. Click here for more details.

Files changed (125) hide show
  1. dateutils/__init__.py +6 -0
  2. dateutils/_enums.py +25 -0
  3. squirrels/dateutils.py → dateutils/_implementation.py +58 -111
  4. dateutils/types.py +6 -0
  5. squirrels/__init__.py +13 -11
  6. squirrels/_api_routes/__init__.py +5 -0
  7. squirrels/_api_routes/auth.py +271 -0
  8. squirrels/_api_routes/base.py +165 -0
  9. squirrels/_api_routes/dashboards.py +150 -0
  10. squirrels/_api_routes/data_management.py +145 -0
  11. squirrels/_api_routes/datasets.py +257 -0
  12. squirrels/_api_routes/oauth2.py +298 -0
  13. squirrels/_api_routes/project.py +252 -0
  14. squirrels/_api_server.py +256 -450
  15. squirrels/_arguments/__init__.py +0 -0
  16. squirrels/_arguments/init_time_args.py +108 -0
  17. squirrels/_arguments/run_time_args.py +147 -0
  18. squirrels/_auth.py +960 -0
  19. squirrels/_command_line.py +126 -45
  20. squirrels/_compile_prompts.py +147 -0
  21. squirrels/_connection_set.py +48 -26
  22. squirrels/_constants.py +68 -38
  23. squirrels/_dashboards.py +160 -0
  24. squirrels/_data_sources.py +570 -0
  25. squirrels/_dataset_types.py +84 -0
  26. squirrels/_exceptions.py +29 -0
  27. squirrels/_initializer.py +177 -80
  28. squirrels/_logging.py +115 -0
  29. squirrels/_manifest.py +208 -79
  30. squirrels/_model_builder.py +69 -0
  31. squirrels/_model_configs.py +74 -0
  32. squirrels/_model_queries.py +52 -0
  33. squirrels/_models.py +926 -367
  34. squirrels/_package_data/base_project/.env +42 -0
  35. squirrels/_package_data/base_project/.env.example +42 -0
  36. squirrels/_package_data/base_project/assets/expenses.db +0 -0
  37. squirrels/_package_data/base_project/connections.yml +16 -0
  38. squirrels/_package_data/base_project/dashboards/dashboard_example.py +34 -0
  39. squirrels/_package_data/base_project/dashboards/dashboard_example.yml +22 -0
  40. squirrels/{package_data → _package_data}/base_project/docker/.dockerignore +5 -2
  41. squirrels/{package_data → _package_data}/base_project/docker/Dockerfile +3 -3
  42. squirrels/{package_data → _package_data}/base_project/docker/compose.yml +1 -1
  43. squirrels/_package_data/base_project/duckdb_init.sql +10 -0
  44. squirrels/{package_data/base_project/.gitignore → _package_data/base_project/gitignore} +3 -2
  45. squirrels/_package_data/base_project/macros/macros_example.sql +17 -0
  46. squirrels/_package_data/base_project/models/builds/build_example.py +26 -0
  47. squirrels/_package_data/base_project/models/builds/build_example.sql +16 -0
  48. squirrels/_package_data/base_project/models/builds/build_example.yml +57 -0
  49. squirrels/_package_data/base_project/models/dbviews/dbview_example.sql +12 -0
  50. squirrels/_package_data/base_project/models/dbviews/dbview_example.yml +26 -0
  51. squirrels/_package_data/base_project/models/federates/federate_example.py +37 -0
  52. squirrels/_package_data/base_project/models/federates/federate_example.sql +19 -0
  53. squirrels/_package_data/base_project/models/federates/federate_example.yml +65 -0
  54. squirrels/_package_data/base_project/models/sources.yml +38 -0
  55. squirrels/{package_data → _package_data}/base_project/parameters.yml +56 -40
  56. squirrels/_package_data/base_project/pyconfigs/connections.py +14 -0
  57. squirrels/{package_data → _package_data}/base_project/pyconfigs/context.py +21 -40
  58. squirrels/_package_data/base_project/pyconfigs/parameters.py +141 -0
  59. squirrels/_package_data/base_project/pyconfigs/user.py +44 -0
  60. squirrels/_package_data/base_project/seeds/seed_categories.yml +15 -0
  61. squirrels/_package_data/base_project/seeds/seed_subcategories.csv +15 -0
  62. squirrels/_package_data/base_project/seeds/seed_subcategories.yml +21 -0
  63. squirrels/_package_data/base_project/squirrels.yml.j2 +61 -0
  64. squirrels/_package_data/templates/dataset_results.html +112 -0
  65. squirrels/_package_data/templates/oauth_login.html +271 -0
  66. squirrels/_package_data/templates/squirrels_studio.html +20 -0
  67. squirrels/_package_loader.py +8 -4
  68. squirrels/_parameter_configs.py +104 -103
  69. squirrels/_parameter_options.py +348 -0
  70. squirrels/_parameter_sets.py +57 -47
  71. squirrels/_parameters.py +1664 -0
  72. squirrels/_project.py +721 -0
  73. squirrels/_py_module.py +7 -5
  74. squirrels/_schemas/__init__.py +0 -0
  75. squirrels/_schemas/auth_models.py +167 -0
  76. squirrels/_schemas/query_param_models.py +75 -0
  77. squirrels/{_api_response_models.py → _schemas/response_models.py} +126 -47
  78. squirrels/_seeds.py +35 -16
  79. squirrels/_sources.py +110 -0
  80. squirrels/_utils.py +248 -73
  81. squirrels/_version.py +1 -1
  82. squirrels/arguments.py +7 -0
  83. squirrels/auth.py +4 -0
  84. squirrels/connections.py +3 -0
  85. squirrels/dashboards.py +2 -81
  86. squirrels/data_sources.py +14 -631
  87. squirrels/parameter_options.py +13 -348
  88. squirrels/parameters.py +14 -1266
  89. squirrels/types.py +16 -0
  90. squirrels-0.5.0.dist-info/METADATA +113 -0
  91. squirrels-0.5.0.dist-info/RECORD +97 -0
  92. {squirrels-0.4.1.dist-info → squirrels-0.5.0.dist-info}/WHEEL +1 -1
  93. squirrels-0.5.0.dist-info/entry_points.txt +3 -0
  94. {squirrels-0.4.1.dist-info → squirrels-0.5.0.dist-info/licenses}/LICENSE +1 -1
  95. squirrels/_authenticator.py +0 -85
  96. squirrels/_dashboards_io.py +0 -61
  97. squirrels/_environcfg.py +0 -84
  98. squirrels/arguments/init_time_args.py +0 -40
  99. squirrels/arguments/run_time_args.py +0 -208
  100. squirrels/package_data/assets/favicon.ico +0 -0
  101. squirrels/package_data/assets/index.css +0 -1
  102. squirrels/package_data/assets/index.js +0 -58
  103. squirrels/package_data/base_project/assets/expenses.db +0 -0
  104. squirrels/package_data/base_project/connections.yml +0 -7
  105. squirrels/package_data/base_project/dashboards/dashboard_example.py +0 -32
  106. squirrels/package_data/base_project/dashboards.yml +0 -10
  107. squirrels/package_data/base_project/env.yml +0 -29
  108. squirrels/package_data/base_project/models/dbviews/dbview_example.py +0 -47
  109. squirrels/package_data/base_project/models/dbviews/dbview_example.sql +0 -22
  110. squirrels/package_data/base_project/models/federates/federate_example.py +0 -21
  111. squirrels/package_data/base_project/models/federates/federate_example.sql +0 -3
  112. squirrels/package_data/base_project/pyconfigs/auth.py +0 -45
  113. squirrels/package_data/base_project/pyconfigs/connections.py +0 -19
  114. squirrels/package_data/base_project/pyconfigs/parameters.py +0 -95
  115. squirrels/package_data/base_project/seeds/seed_subcategories.csv +0 -15
  116. squirrels/package_data/base_project/squirrels.yml.j2 +0 -94
  117. squirrels/package_data/templates/index.html +0 -18
  118. squirrels/project.py +0 -378
  119. squirrels/user_base.py +0 -55
  120. squirrels-0.4.1.dist-info/METADATA +0 -117
  121. squirrels-0.4.1.dist-info/RECORD +0 -60
  122. squirrels-0.4.1.dist-info/entry_points.txt +0 -4
  123. /squirrels/{package_data → _package_data}/base_project/assets/weather.db +0 -0
  124. /squirrels/{package_data → _package_data}/base_project/seeds/seed_categories.csv +0 -0
  125. /squirrels/{package_data → _package_data}/base_project/tmp/.gitignore +0 -0
squirrels/_manifest.py CHANGED
@@ -1,22 +1,32 @@
1
- from typing import Any
1
+ from functools import cached_property
2
+ from typing import Literal, Any
3
+ from urllib.parse import urlparse
4
+ from sqlalchemy import Engine, create_engine
2
5
  from typing_extensions import Self
3
6
  from enum import Enum
4
7
  from pydantic import BaseModel, Field, field_validator, model_validator, ValidationInfo, ValidationError
5
- import yaml, time
8
+ import yaml, time, re
6
9
 
7
- from . import _constants as c, _utils as _u
8
- from ._environcfg import EnvironConfig
10
+ from . import _constants as c, _utils as u
9
11
 
10
12
 
11
13
  class ProjectVarsConfig(BaseModel, extra="allow"):
12
14
  name: str
13
15
  label: str = ""
16
+ description: str = ""
14
17
  major_version: int
15
18
 
19
+ @field_validator("name")
20
+ @classmethod
21
+ def validate_name(cls, v: str) -> str:
22
+ if not re.fullmatch(r"[A-Za-z0-9_-]+", v):
23
+ raise ValueError("Project name must only contain alphanumeric characters, underscores, and dashes.")
24
+ return v
25
+
16
26
  @model_validator(mode="after")
17
27
  def finalize_label(self) -> Self:
18
28
  if self.label == "":
19
- self.label = self.name
29
+ self.label = u.to_title_case(self.name)
20
30
  return self
21
31
 
22
32
 
@@ -36,41 +46,133 @@ class _ConfigWithNameBaseModel(BaseModel):
36
46
  name: str
37
47
 
38
48
 
39
- class DbConnConfig(_ConfigWithNameBaseModel):
40
- credential: str | None = None
41
- url: str
49
+ class ConnectionTypeEnum(Enum):
50
+ SQLALCHEMY = "sqlalchemy"
51
+ CONNECTORX = "connectorx"
52
+ ADBC = "adbc"
53
+ DUCKDB = "duckdb"
42
54
 
43
- def finalize_url(self, base_path: str, env_cfg: EnvironConfig) -> Self:
44
- username, password = env_cfg.get_credential(self.credential)
45
- self.url = self.url.format(username=username, password=password, project_path=base_path)
55
+
56
+ class ConnectionProperties(BaseModel):
57
+ """
58
+ A class for holding the properties of a connection
59
+
60
+ Arguments:
61
+ type: The type of connection, one of "sqlalchemy", "connectorx", or "adbc"
62
+ uri: The URI for the connection
63
+ """
64
+ label: str | None = None
65
+ type: ConnectionTypeEnum = Field(default=ConnectionTypeEnum.SQLALCHEMY)
66
+ uri: str
67
+ sa_create_engine_args: dict[str, Any] = Field(default_factory=dict)
68
+
69
+ @cached_property
70
+ def engine(self) -> Engine:
71
+ """
72
+ Creates and caches a SQLAlchemy engine if the connection type is sqlalchemy.
73
+ Returns None for other connection types.
74
+ """
75
+ if self.type == ConnectionTypeEnum.SQLALCHEMY:
76
+ return create_engine(self.uri, **self.sa_create_engine_args)
77
+ else:
78
+ raise ValueError(f'Connection type "{self.type}" does not support engine property')
79
+
80
+ @cached_property
81
+ def dialect(self) -> str:
82
+ default_dialect = None
83
+ if self.type == ConnectionTypeEnum.SQLALCHEMY:
84
+ dialect = self.engine.dialect.name
85
+ elif self.type == ConnectionTypeEnum.DUCKDB:
86
+ dialect = self.uri.split(':')[0]
87
+ default_dialect = 'duckdb'
88
+ else:
89
+ url = urlparse(self.uri)
90
+ dialect = url.scheme
91
+
92
+ processed_dialect = next((d for d in ['sqlite', 'postgres', 'mysql', 'duckdb'] if dialect.lower().startswith(d)), default_dialect)
93
+ dialect = processed_dialect if processed_dialect is not None else dialect
94
+ return dialect
95
+
96
+ @cached_property
97
+ def attach_uri_for_duckdb(self) -> str | None:
98
+ if self.type == ConnectionTypeEnum.DUCKDB:
99
+ return self.uri
100
+ elif self.type == ConnectionTypeEnum.SQLALCHEMY:
101
+ url = self.engine.url
102
+ host = url.host
103
+ port = url.port
104
+ username = url.username
105
+ password = url.password
106
+ database = url.database
107
+ database_as_file = database if database is not None else ""
108
+ else:
109
+ url = urlparse(self.uri)
110
+ host = url.hostname
111
+ port = url.port
112
+ username = url.username
113
+ password = url.password
114
+ database = url.path.lstrip('/')
115
+ database_as_file = self.uri.replace(f"{self.dialect}://", "")
116
+
117
+ if self.dialect in ('postgres', 'mysql'):
118
+ attach_uri = f"{self.dialect}:dbname={database} user={username} password={password} host={host} port={port}"
119
+ elif self.dialect == "sqlite":
120
+ attach_uri = f"{self.dialect}:{database_as_file}"
121
+ elif self.dialect == "duckdb":
122
+ attach_uri = database_as_file
123
+ else:
124
+ attach_uri = None
125
+
126
+ return attach_uri
127
+
128
+
129
+ class DbConnConfig(ConnectionProperties, _ConfigWithNameBaseModel):
130
+ def finalize_uri(self, base_path: str) -> Self:
131
+ self.uri = self.uri.format(project_path=base_path)
46
132
  return self
47
133
 
48
134
 
135
+ class DatasetConfigurablesConfig(BaseModel):
136
+ name: str
137
+ default: str
138
+
139
+
140
+ class ConfigurablesConfig(DatasetConfigurablesConfig):
141
+ label: str = ""
142
+ description: str = ""
143
+
144
+
49
145
  class ParametersConfig(BaseModel):
50
146
  type: str
51
147
  factory: str
52
148
  arguments: dict[str, Any]
53
149
 
54
150
 
55
- class DbviewConfig(_ConfigWithNameBaseModel):
56
- connection_name: str | None = None
151
+ class PermissionScope(Enum):
152
+ PUBLIC = 0
153
+ PROTECTED = 1
154
+ PRIVATE = 2
57
155
 
58
156
 
59
- class FederateConfig(_ConfigWithNameBaseModel):
60
- materialized: str | None = None
157
+ class AuthenticationEnforcement(Enum):
158
+ REQUIRED = "required"
159
+ OPTIONAL = "optional"
160
+ DISABLED = "disabled"
61
161
 
162
+ class AuthenticationType(Enum):
163
+ MANAGED = "managed"
164
+ EXTERNAL = "external"
62
165
 
63
- class DatasetScope(Enum):
64
- PUBLIC = 0
65
- PROTECTED = 1
66
- PRIVATE = 2
166
+ class AuthenticationConfig(BaseModel):
167
+ enforcement: AuthenticationEnforcement = AuthenticationEnforcement.OPTIONAL
168
+ type: AuthenticationType = AuthenticationType.MANAGED
67
169
 
68
170
 
69
171
  class AnalyticsOutputConfig(_ConfigWithNameBaseModel):
70
172
  label: str = ""
71
173
  description: str = ""
72
- scope: DatasetScope = DatasetScope.PUBLIC
73
- parameters: list[str] = Field(default_factory=list)
174
+ scope: PermissionScope = PermissionScope.PUBLIC
175
+ parameters: list[str] | None = Field(default=None, description="The list of parameter names used by the dataset/dashboard")
74
176
 
75
177
  @model_validator(mode="after")
76
178
  def finalize_label(self) -> Self:
@@ -80,19 +182,18 @@ class AnalyticsOutputConfig(_ConfigWithNameBaseModel):
80
182
 
81
183
  @field_validator("scope", mode="before")
82
184
  @classmethod
83
- def validate_scope(cls, value: str, info: ValidationInfo) -> DatasetScope:
185
+ def validate_scope(cls, value: str, info: ValidationInfo) -> PermissionScope:
84
186
  try:
85
- return DatasetScope[str(value).upper()]
187
+ return PermissionScope[str(value).upper()]
86
188
  except KeyError as e:
87
189
  name = info.data.get("name")
88
- scope_list = [scope.name.lower() for scope in DatasetScope]
190
+ scope_list = [scope.name.lower() for scope in PermissionScope]
89
191
  raise ValueError(f'Scope "{value}" is invalid for dataset/dashboard "{name}". Scope must be one of {scope_list}') from e
90
192
 
91
193
 
92
194
  class DatasetConfig(AnalyticsOutputConfig):
93
195
  model: str = ""
94
- traits: dict = Field(default_factory=dict)
95
- default_test_set: str = ""
196
+ configurables: list[DatasetConfigurablesConfig] = Field(default_factory=list)
96
197
 
97
198
  def __hash__(self) -> int:
98
199
  return hash("dataset_"+self.name)
@@ -104,47 +205,27 @@ class DatasetConfig(AnalyticsOutputConfig):
104
205
  return self
105
206
 
106
207
 
107
- class DashboardConfig(AnalyticsOutputConfig):
108
- def __hash__(self) -> int:
109
- return hash("dashboard_"+self.name)
110
-
208
+ class TestSetsUserConfig(BaseModel):
209
+ access_level: Literal["admin", "member", "guest"] = "guest"
210
+ custom_fields: dict[str, Any] = Field(default_factory=dict)
111
211
 
112
212
  class TestSetsConfig(_ConfigWithNameBaseModel):
113
- datasets: list[str] | None = None
114
- is_authenticated: bool = False
115
- user_attributes: dict[str, Any] = Field(default_factory=dict)
213
+ user: TestSetsUserConfig = Field(default_factory=TestSetsUserConfig)
116
214
  parameters: dict[str, Any] = Field(default_factory=dict)
117
-
118
- @model_validator(mode="after")
119
- def finalize_is_authenticated(self) -> Self:
120
- if len(self.user_attributes) > 0:
121
- self.is_authenticated = True
122
- return self
123
-
124
-
125
- class Settings(BaseModel):
126
- data: dict[str, Any]
127
-
128
- def get_default_connection_name(self) -> str:
129
- return self.data.get(c.DB_CONN_DEFAULT_USED_SETTING, c.DEFAULT_DB_CONN)
130
-
131
- def do_use_duckdb(self) -> bool:
132
- return self.data.get(c.IN_MEMORY_DB_SETTING, c.SQLITE) == c.DUCKDB
215
+ configurables: dict[str, Any] = Field(default_factory=dict)
133
216
 
134
217
 
135
218
  class ManifestConfig(BaseModel):
136
- env_cfg: EnvironConfig
137
219
  project_variables: ProjectVarsConfig
220
+ authentication: AuthenticationConfig = Field(default_factory=AuthenticationConfig)
138
221
  packages: list[PackageConfig] = Field(default_factory=list)
139
222
  connections: dict[str, DbConnConfig] = Field(default_factory=dict)
140
223
  parameters: list[ParametersConfig] = Field(default_factory=list)
224
+ configurables: dict[str, ConfigurablesConfig] = Field(default_factory=dict)
141
225
  selection_test_sets: dict[str, TestSetsConfig] = Field(default_factory=dict)
142
- dbviews: dict[str, DbviewConfig] = Field(default_factory=dict)
143
- federates: dict[str, FederateConfig] = Field(default_factory=dict)
144
226
  datasets: dict[str, DatasetConfig] = Field(default_factory=dict)
145
- dashboards: dict[str, DashboardConfig] = Field(default_factory=dict)
146
- settings: dict[str, Any] = Field(default_factory=dict)
147
227
  base_path: str = "."
228
+ env_vars: dict[str, str] = Field(default_factory=dict)
148
229
 
149
230
  @field_validator("packages")
150
231
  @classmethod
@@ -156,13 +237,13 @@ class ManifestConfig(BaseModel):
156
237
  set_of_directories.add(package.directory)
157
238
  return packages
158
239
 
159
- @field_validator("connections", "selection_test_sets", "dbviews", "federates", "datasets", "dashboards", mode="before")
240
+ @field_validator("connections", "selection_test_sets", "datasets", "configurables", mode="before")
160
241
  @classmethod
161
242
  def names_are_unique(cls, values: list[dict] | dict[str, dict], info: ValidationInfo) -> dict[str, dict]:
162
243
  if isinstance(values, list):
163
244
  values_as_dict = {}
164
245
  for obj in values:
165
- name = obj["name"]
246
+ name = u.normalize_name(obj["name"])
166
247
  if name in values_as_dict:
167
248
  raise ValueError(f'In the {info.field_name} section, the name "{name}" was specified multiple times')
168
249
  values_as_dict[name] = obj
@@ -173,45 +254,93 @@ class ManifestConfig(BaseModel):
173
254
  @model_validator(mode="after")
174
255
  def finalize_connections(self) -> Self:
175
256
  for conn in self.connections.values():
176
- conn.finalize_url(self.base_path, self.env_cfg)
257
+ conn.finalize_uri(self.base_path)
177
258
  return self
178
259
 
179
- @property
180
- def settings_obj(self) -> Settings:
181
- return Settings(data=self.settings)
182
-
183
- def get_default_test_set(self, dataset_name: str) -> TestSetsConfig:
260
+ @model_validator(mode="after")
261
+ def validate_authentication_and_scopes(self) -> Self:
262
+ """
263
+ Enforce authentication rules:
264
+ - If authentication.is_required, no dataset may be PUBLIC.
265
+ """
266
+ if self.authentication.enforcement == AuthenticationEnforcement.REQUIRED:
267
+ invalid = [name for name, ds in self.datasets.items() if ds.scope == PermissionScope.PUBLIC]
268
+ if invalid:
269
+ raise ValueError(
270
+ "Authentication is required, so datasets cannot be public. "
271
+ f"Update the scope for datasets: {invalid}"
272
+ )
273
+ return self
274
+
275
+ @model_validator(mode="after")
276
+ def validate_dataset_configurables(self) -> Self:
277
+ """
278
+ Validate that dataset configurables reference valid project-level configurables.
279
+ """
280
+ for dataset_name, dataset_cfg in self.datasets.items():
281
+ for cfg_override in dataset_cfg.configurables:
282
+ if cfg_override.name not in self.configurables:
283
+ raise ValueError(
284
+ f'Dataset "{dataset_name}" references configurable "{cfg_override.name}" which is not defined '
285
+ f'in the project configurables'
286
+ )
287
+ return self
288
+
289
+ def get_default_test_set(self) -> TestSetsConfig:
184
290
  """
185
291
  Raises KeyError if dataset name doesn't exist
186
292
  """
187
- default_name_1 = self.datasets[dataset_name].default_test_set
188
- default_name_2 = self.settings.get(c.TEST_SET_DEFAULT_USED_SETTING, c.DEFAULT_TEST_SET_NAME)
189
- default_name = default_name_1 if default_name_1 else default_name_2
190
- default_test_set = self.selection_test_sets.get(default_name, TestSetsConfig(name=default_name))
293
+ default_default_test_set = TestSetsConfig(name=c.DEFAULT_TEST_SET_NAME)
294
+ default_test_set = self.selection_test_sets.get(c.DEFAULT_TEST_SET_NAME, default_default_test_set)
191
295
  return default_test_set
192
296
 
193
- def get_applicable_test_sets(self, dataset: str) -> list[str]:
194
- applicable_test_sets = []
195
- for test_set_name, test_set_config in self.selection_test_sets.items():
196
- if test_set_config.datasets is None or dataset in test_set_config.datasets:
197
- applicable_test_sets.append(test_set_name)
198
- return applicable_test_sets
297
+ def get_default_configurables(self, dataset_name: str | None = None) -> dict[str, str]:
298
+ """
299
+ Return a dictionary of configurable name to its default value.
300
+
301
+ If dataset_name is provided, merges project-level defaults with dataset-specific overrides.
302
+
303
+ Supports both list- and dict-shaped internal storage for configurables.
304
+ """
305
+ defaults: dict[str, str] = {}
306
+ for name, cfg in self.configurables.items():
307
+ defaults[name] = str(cfg.default)
308
+
309
+ # Apply dataset-specific overrides if dataset_name is provided
310
+ if dataset_name is not None:
311
+ dataset_cfg = self.datasets.get(dataset_name)
312
+ if dataset_cfg:
313
+ for cfg_override in dataset_cfg.configurables:
314
+ defaults[cfg_override.name] = cfg_override.default
315
+
316
+ return defaults
199
317
 
200
318
 
201
319
  class ManifestIO:
202
320
 
203
321
  @classmethod
204
- def load_from_file(cls, logger: _u.Logger, base_path: str, env_cfg: EnvironConfig) -> ManifestConfig:
322
+ def load_from_file(cls, logger: u.Logger, base_path: str, env_vars: dict[str, str]) -> ManifestConfig:
205
323
  start = time.time()
206
324
 
207
- raw_content = _u.read_file(_u.Path(base_path, c.MANIFEST_FILE))
208
- env_vars = env_cfg.get_all_env_vars()
209
- content = _u.render_string(raw_content, base_path=base_path, env_vars=env_vars)
210
- manifest_content = yaml.safe_load(content)
325
+ raw_content = u.read_file(u.Path(base_path, c.MANIFEST_FILE))
326
+ content = u.render_string(raw_content, base_path=base_path, env_vars=env_vars)
327
+ manifest_content: dict[str, Any] = yaml.safe_load(content)
328
+
329
+ auth_cfg: dict[str, Any] = manifest_content.get("authentication", {})
330
+ is_auth_required = bool(auth_cfg.get("is_required", False))
331
+
332
+ if is_auth_required:
333
+ # If authentication is required, assume PROTECTED when scope is not specified
334
+ # while explicitly forbidding PUBLIC (enforced in model validator)
335
+ datasets_raw = manifest_content.get("datasets", [])
336
+ for ds in datasets_raw:
337
+ if isinstance(ds, dict) and "scope" not in ds:
338
+ ds["scope"] = "protected"
339
+
211
340
  try:
212
- manifest_cfg = ManifestConfig(base_path=base_path, env_cfg=env_cfg, **manifest_content)
341
+ manifest_cfg = ManifestConfig(base_path=base_path, **manifest_content)
213
342
  except ValidationError as e:
214
- raise _u.ConfigurationError(f"Failed to process {c.MANIFEST_FILE} file. " + str(e)) from e
343
+ raise u.ConfigurationError(f"Failed to process {c.MANIFEST_FILE} file. " + str(e)) from e
215
344
 
216
345
  logger.log_activity_time(f"loading {c.MANIFEST_FILE} file", start)
217
346
  return manifest_cfg
@@ -0,0 +1,69 @@
1
+ from dataclasses import dataclass, field
2
+ import duckdb, time
3
+
4
+ from . import _utils as u, _connection_set as cs, _models as m
5
+
6
+
7
+ @dataclass
8
+ class ModelBuilder:
9
+ _datalake_db_path: str
10
+ _conn_set: cs.ConnectionSet
11
+ _static_models: dict[str, m.StaticModel]
12
+ _conn_args: cs.ConnectionsArgs = field(default_factory=lambda: cs.ConnectionsArgs(".", {}, {}))
13
+ _logger: u.Logger = field(default_factory=lambda: u.Logger(""))
14
+
15
+ def _attach_connections(self, duckdb_conn: duckdb.DuckDBPyConnection) -> None:
16
+ for conn_name, conn_props in self._conn_set.get_connections_as_dict().items():
17
+ if not isinstance(conn_props, m.ConnectionProperties):
18
+ continue
19
+ attach_uri = conn_props.attach_uri_for_duckdb
20
+ if attach_uri is None:
21
+ continue # skip unsupported dialects
22
+ attach_stmt = f"ATTACH IF NOT EXISTS '{attach_uri}' AS db_{conn_name} (READ_ONLY)"
23
+ u.run_duckdb_stmt(self._logger, duckdb_conn, attach_stmt, redacted_values=[attach_uri])
24
+
25
+ async def _build_models(self, duckdb_conn: duckdb.DuckDBPyConnection, select: str | None, full_refresh: bool) -> None:
26
+ """
27
+ Compile and construct the build models as DuckDB tables.
28
+ """
29
+ # Compile the build models
30
+ models_list = self._static_models.values() if select is None else [self._static_models[select]]
31
+ for model in models_list:
32
+ model.compile_for_build(self._conn_args, self._static_models)
33
+
34
+ # Find all terminal nodes
35
+ terminal_nodes = set()
36
+ if select is None:
37
+ for model in models_list:
38
+ terminal_nodes.update(model.get_terminal_nodes_for_build(set()))
39
+ for model in models_list:
40
+ model.confirmed_no_cycles = False
41
+ else:
42
+ terminal_nodes.add(select)
43
+
44
+ # Run the build models
45
+ coroutines = []
46
+ for model_name in terminal_nodes:
47
+ model = self._static_models[model_name]
48
+ # await model.build_model(duckdb_conn, full_refresh)
49
+ coro = model.build_model(duckdb_conn, full_refresh)
50
+ coroutines.append(coro)
51
+ await u.asyncio_gather(coroutines)
52
+
53
+ async def build(self, full_refresh: bool, select: str | None) -> None:
54
+ start = time.time()
55
+
56
+ # Connect directly to DuckLake instead of attaching (supports concurrent connections)
57
+ duckdb_conn = u.create_duckdb_connection(self._datalake_db_path)
58
+
59
+ try:
60
+ # Attach connections
61
+ self._attach_connections(duckdb_conn)
62
+
63
+ # Construct build models
64
+ await self._build_models(duckdb_conn, select, full_refresh)
65
+
66
+ finally:
67
+ duckdb_conn.close()
68
+
69
+ self._logger.log_activity_time("TOTAL TIME to build the Virtual Data Lake (VDL)", start)
@@ -0,0 +1,74 @@
1
+ from enum import Enum
2
+ from pydantic import BaseModel, Field
3
+
4
+ from . import _constants as c
5
+
6
+
7
+ class ColumnCategory(Enum):
8
+ DIMENSION = "dimension"
9
+ MEASURE = "measure"
10
+ MISC = "misc"
11
+
12
+
13
+ class ColumnConfig(BaseModel):
14
+ name: str = Field(description="The name of the column")
15
+ type: str = Field(default="", description="The type of the column such as 'string', 'integer', 'float', 'boolean', 'datetime', etc.")
16
+ condition: str = Field(default="", description="The condition of when the column is included")
17
+ description: str = Field(default="", description="The description of the column")
18
+ category: ColumnCategory = Field(default=ColumnCategory.MISC, description="The category of the column, either 'dimension', 'measure', or 'misc'")
19
+ depends_on: set[str] = Field(default_factory=set, description="List of dependent columns")
20
+ pass_through: bool = Field(default=False, description="Whether the column should be passed through to the federate")
21
+
22
+
23
+ class ModelConfig(BaseModel):
24
+ description: str = Field(default="", description="The description of the model")
25
+ columns: list[ColumnConfig] = Field(default_factory=list, description="The columns of the model")
26
+
27
+
28
+ class SeedConfig(ModelConfig):
29
+ cast_column_types: bool = Field(default=False, description="Whether the column types should be cast to the appropriate type")
30
+
31
+
32
+ class ConnectionInterface(BaseModel):
33
+ connection: str | None = Field(default=None, description="The connection name of the source model / database view")
34
+
35
+ def finalize_connection(self, env_vars: dict[str, str]):
36
+ if self.connection is None:
37
+ self.connection = env_vars.get(c.SQRL_CONNECTIONS_DEFAULT_NAME_USED, "default")
38
+ return self
39
+
40
+ def get_connection(self) -> str:
41
+ assert self.connection is not None, "Connection must be set"
42
+ return self.connection
43
+
44
+
45
+ class QueryModelConfig(ModelConfig):
46
+ depends_on: set[str] = Field(default_factory=set, description="The dependencies of the model")
47
+
48
+
49
+ class BuildModelConfig(QueryModelConfig):
50
+ materialization: str = Field(default="VIEW", description="The materialization of the model (ignored if Python model which is always a table)")
51
+
52
+ def get_sql_for_build(self, model_name: str, select_query: str) -> str:
53
+ if self.materialization.upper() == "TABLE":
54
+ materialization = "TABLE"
55
+ elif self.materialization.upper() == "VIEW":
56
+ materialization = "VIEW"
57
+ else:
58
+ raise ValueError(f"Invalid materialization: {self.materialization}")
59
+
60
+ create_prefix = f"CREATE OR REPLACE {materialization} {model_name} AS\n\n"
61
+ return create_prefix + select_query
62
+
63
+
64
+ class DbviewModelConfig(ConnectionInterface, QueryModelConfig):
65
+ translate_to_duckdb: bool = Field(default=False, description="Whether to translate the query to DuckDB and use DuckDB tables at runtime")
66
+
67
+
68
+ class FederateModelConfig(QueryModelConfig):
69
+ eager: bool = Field(default=False, description="Whether the model should be materialized for SQL models")
70
+
71
+ def get_sql_for_create(self, model_name: str, select_query: str) -> str:
72
+ materialization = "TABLE" if self.eager else "VIEW"
73
+ create_prefix = f"CREATE {materialization} {model_name} AS\n\n"
74
+ return create_prefix + select_query
@@ -0,0 +1,52 @@
1
+ from abc import ABCMeta
2
+ from dataclasses import dataclass, field
3
+ from typing import Callable, Generic, TypeVar, Any
4
+ import polars as pl, pandas as pd
5
+
6
+ from ._arguments.run_time_args import BuildModelArgs
7
+ from ._model_configs import ModelConfig
8
+
9
+
10
+ # Input query file classes
11
+
12
+ @dataclass(frozen=True)
13
+ class QueryFile(metaclass=ABCMeta):
14
+ filepath: str
15
+ raw_query: Any
16
+
17
+ @dataclass(frozen=True)
18
+ class SqlQueryFile(QueryFile):
19
+ raw_query: str
20
+
21
+ @dataclass(frozen=True)
22
+ class PyQueryFile(QueryFile):
23
+ raw_query: Callable[[BuildModelArgs], pl.LazyFrame | pd.DataFrame]
24
+
25
+
26
+ Q = TypeVar('Q', bound=QueryFile)
27
+ M = TypeVar('M', bound=ModelConfig)
28
+
29
+ @dataclass(frozen=True)
30
+ class QueryFileWithConfig(Generic[Q, M]):
31
+ query_file: Q
32
+ config: M
33
+
34
+
35
+ # Compiled query classes
36
+
37
+ @dataclass
38
+ class Query(metaclass=ABCMeta):
39
+ query: Any
40
+
41
+ @dataclass
42
+ class WorkInProgress(Query):
43
+ query: None = field(default=None, init=False)
44
+
45
+ @dataclass
46
+ class SqlModelQuery(Query):
47
+ query: str
48
+ is_duckdb: bool
49
+
50
+ @dataclass
51
+ class PyModelQuery(Query):
52
+ query: Callable[[], pl.LazyFrame | pd.DataFrame]