squirrels 0.5.0rc0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of squirrels might be problematic. Click here for more details.
- dateutils/__init__.py +6 -0
- dateutils/_enums.py +25 -0
- squirrels/dateutils.py → dateutils/_implementation.py +58 -111
- dateutils/types.py +6 -0
- squirrels/__init__.py +10 -12
- squirrels/_api_routes/__init__.py +5 -0
- squirrels/_api_routes/auth.py +271 -0
- squirrels/_api_routes/base.py +171 -0
- squirrels/_api_routes/dashboards.py +158 -0
- squirrels/_api_routes/data_management.py +148 -0
- squirrels/_api_routes/datasets.py +265 -0
- squirrels/_api_routes/oauth2.py +298 -0
- squirrels/_api_routes/project.py +252 -0
- squirrels/_api_server.py +245 -781
- squirrels/_arguments/__init__.py +0 -0
- squirrels/{arguments → _arguments}/init_time_args.py +7 -2
- squirrels/{arguments → _arguments}/run_time_args.py +13 -35
- squirrels/_auth.py +720 -212
- squirrels/_command_line.py +81 -41
- squirrels/_compile_prompts.py +147 -0
- squirrels/_connection_set.py +16 -7
- squirrels/_constants.py +29 -9
- squirrels/{_dashboards_io.py → _dashboards.py} +87 -6
- squirrels/_data_sources.py +570 -0
- squirrels/{dataset_result.py → _dataset_types.py} +2 -4
- squirrels/_exceptions.py +9 -37
- squirrels/_initializer.py +83 -59
- squirrels/_logging.py +117 -0
- squirrels/_manifest.py +129 -62
- squirrels/_model_builder.py +10 -52
- squirrels/_model_configs.py +3 -3
- squirrels/_model_queries.py +1 -1
- squirrels/_models.py +249 -118
- squirrels/{package_data → _package_data}/base_project/.env +16 -4
- squirrels/{package_data → _package_data}/base_project/.env.example +15 -3
- squirrels/{package_data → _package_data}/base_project/connections.yml +4 -3
- squirrels/{package_data → _package_data}/base_project/dashboards/dashboard_example.py +4 -4
- squirrels/_package_data/base_project/dashboards/dashboard_example.yml +22 -0
- squirrels/{package_data → _package_data}/base_project/duckdb_init.sql +1 -0
- squirrels/_package_data/base_project/macros/macros_example.sql +17 -0
- squirrels/{package_data → _package_data}/base_project/models/builds/build_example.py +2 -2
- squirrels/{package_data → _package_data}/base_project/models/builds/build_example.sql +1 -1
- squirrels/{package_data → _package_data}/base_project/models/builds/build_example.yml +2 -0
- squirrels/_package_data/base_project/models/dbviews/dbview_example.sql +17 -0
- squirrels/_package_data/base_project/models/dbviews/dbview_example.yml +32 -0
- squirrels/_package_data/base_project/models/federates/federate_example.py +48 -0
- squirrels/_package_data/base_project/models/federates/federate_example.sql +21 -0
- squirrels/{package_data → _package_data}/base_project/models/federates/federate_example.yml +7 -7
- squirrels/{package_data → _package_data}/base_project/models/sources.yml +5 -6
- squirrels/{package_data → _package_data}/base_project/parameters.yml +32 -45
- squirrels/_package_data/base_project/pyconfigs/connections.py +18 -0
- squirrels/{package_data → _package_data}/base_project/pyconfigs/context.py +31 -22
- squirrels/_package_data/base_project/pyconfigs/parameters.py +141 -0
- squirrels/_package_data/base_project/pyconfigs/user.py +44 -0
- squirrels/{package_data → _package_data}/base_project/seeds/seed_categories.yml +1 -1
- squirrels/{package_data → _package_data}/base_project/seeds/seed_subcategories.yml +1 -1
- squirrels/_package_data/base_project/squirrels.yml.j2 +61 -0
- squirrels/_package_data/templates/dataset_results.html +112 -0
- squirrels/_package_data/templates/oauth_login.html +271 -0
- squirrels/_package_data/templates/squirrels_studio.html +20 -0
- squirrels/_parameter_configs.py +76 -55
- squirrels/_parameter_options.py +348 -0
- squirrels/_parameter_sets.py +53 -45
- squirrels/_parameters.py +1664 -0
- squirrels/_project.py +403 -242
- squirrels/_py_module.py +3 -2
- squirrels/_request_context.py +33 -0
- squirrels/_schemas/__init__.py +0 -0
- squirrels/_schemas/auth_models.py +167 -0
- squirrels/_schemas/query_param_models.py +75 -0
- squirrels/{_api_response_models.py → _schemas/response_models.py} +48 -18
- squirrels/_seeds.py +1 -1
- squirrels/_sources.py +23 -19
- squirrels/_utils.py +121 -39
- squirrels/_version.py +1 -1
- squirrels/arguments.py +7 -0
- squirrels/auth.py +4 -0
- squirrels/connections.py +3 -0
- squirrels/dashboards.py +2 -81
- squirrels/data_sources.py +14 -563
- squirrels/parameter_options.py +13 -348
- squirrels/parameters.py +14 -1266
- squirrels/types.py +16 -0
- {squirrels-0.5.0rc0.dist-info → squirrels-0.5.1.dist-info}/METADATA +42 -30
- squirrels-0.5.1.dist-info/RECORD +98 -0
- squirrels/package_data/base_project/dashboards/dashboard_example.yml +0 -22
- squirrels/package_data/base_project/macros/macros_example.sql +0 -15
- squirrels/package_data/base_project/models/dbviews/dbview_example.sql +0 -12
- squirrels/package_data/base_project/models/dbviews/dbview_example.yml +0 -26
- squirrels/package_data/base_project/models/federates/federate_example.py +0 -44
- squirrels/package_data/base_project/models/federates/federate_example.sql +0 -17
- squirrels/package_data/base_project/pyconfigs/connections.py +0 -14
- squirrels/package_data/base_project/pyconfigs/parameters.py +0 -93
- squirrels/package_data/base_project/pyconfigs/user.py +0 -23
- squirrels/package_data/base_project/squirrels.yml.j2 +0 -71
- squirrels-0.5.0rc0.dist-info/RECORD +0 -70
- /squirrels/{package_data → _package_data}/base_project/assets/expenses.db +0 -0
- /squirrels/{package_data → _package_data}/base_project/assets/weather.db +0 -0
- /squirrels/{package_data → _package_data}/base_project/docker/.dockerignore +0 -0
- /squirrels/{package_data → _package_data}/base_project/docker/Dockerfile +0 -0
- /squirrels/{package_data → _package_data}/base_project/docker/compose.yml +0 -0
- /squirrels/{package_data/base_project/.gitignore → _package_data/base_project/gitignore} +0 -0
- /squirrels/{package_data → _package_data}/base_project/seeds/seed_categories.csv +0 -0
- /squirrels/{package_data → _package_data}/base_project/seeds/seed_subcategories.csv +0 -0
- /squirrels/{package_data → _package_data}/base_project/tmp/.gitignore +0 -0
- {squirrels-0.5.0rc0.dist-info → squirrels-0.5.1.dist-info}/WHEEL +0 -0
- {squirrels-0.5.0rc0.dist-info → squirrels-0.5.1.dist-info}/entry_points.txt +0 -0
- {squirrels-0.5.0rc0.dist-info → squirrels-0.5.1.dist-info}/licenses/LICENSE +0 -0
squirrels/_manifest.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
from functools import cached_property
|
|
2
|
-
from typing import Any
|
|
2
|
+
from typing import Literal, Any
|
|
3
3
|
from urllib.parse import urlparse
|
|
4
4
|
from sqlalchemy import Engine, create_engine
|
|
5
5
|
from typing_extensions import Self
|
|
6
6
|
from enum import Enum
|
|
7
7
|
from pydantic import BaseModel, Field, field_validator, model_validator, ValidationInfo, ValidationError
|
|
8
|
-
import yaml, time
|
|
8
|
+
import yaml, time, re
|
|
9
9
|
|
|
10
10
|
from . import _constants as c, _utils as u
|
|
11
11
|
|
|
@@ -16,10 +16,17 @@ class ProjectVarsConfig(BaseModel, extra="allow"):
|
|
|
16
16
|
description: str = ""
|
|
17
17
|
major_version: int
|
|
18
18
|
|
|
19
|
+
@field_validator("name")
|
|
20
|
+
@classmethod
|
|
21
|
+
def validate_name(cls, v: str) -> str:
|
|
22
|
+
if not re.fullmatch(r"[A-Za-z0-9_-]+", v):
|
|
23
|
+
raise ValueError("Project name must only contain alphanumeric characters, underscores, and dashes.")
|
|
24
|
+
return v
|
|
25
|
+
|
|
19
26
|
@model_validator(mode="after")
|
|
20
27
|
def finalize_label(self) -> Self:
|
|
21
28
|
if self.label == "":
|
|
22
|
-
self.label = self.name
|
|
29
|
+
self.label = u.to_title_case(self.name)
|
|
23
30
|
return self
|
|
24
31
|
|
|
25
32
|
|
|
@@ -39,10 +46,11 @@ class _ConfigWithNameBaseModel(BaseModel):
|
|
|
39
46
|
name: str
|
|
40
47
|
|
|
41
48
|
|
|
42
|
-
class
|
|
49
|
+
class ConnectionTypeEnum(Enum):
|
|
43
50
|
SQLALCHEMY = "sqlalchemy"
|
|
44
51
|
CONNECTORX = "connectorx"
|
|
45
52
|
ADBC = "adbc"
|
|
53
|
+
DUCKDB = "duckdb"
|
|
46
54
|
|
|
47
55
|
|
|
48
56
|
class ConnectionProperties(BaseModel):
|
|
@@ -54,7 +62,7 @@ class ConnectionProperties(BaseModel):
|
|
|
54
62
|
uri: The URI for the connection
|
|
55
63
|
"""
|
|
56
64
|
label: str | None = None
|
|
57
|
-
type:
|
|
65
|
+
type: ConnectionTypeEnum = Field(default=ConnectionTypeEnum.SQLALCHEMY)
|
|
58
66
|
uri: str
|
|
59
67
|
sa_create_engine_args: dict[str, Any] = Field(default_factory=dict)
|
|
60
68
|
|
|
@@ -64,33 +72,39 @@ class ConnectionProperties(BaseModel):
|
|
|
64
72
|
Creates and caches a SQLAlchemy engine if the connection type is sqlalchemy.
|
|
65
73
|
Returns None for other connection types.
|
|
66
74
|
"""
|
|
67
|
-
if self.type ==
|
|
75
|
+
if self.type == ConnectionTypeEnum.SQLALCHEMY:
|
|
68
76
|
return create_engine(self.uri, **self.sa_create_engine_args)
|
|
69
77
|
else:
|
|
70
78
|
raise ValueError(f'Connection type "{self.type}" does not support engine property')
|
|
71
79
|
|
|
72
80
|
@cached_property
|
|
73
81
|
def dialect(self) -> str:
|
|
74
|
-
|
|
82
|
+
default_dialect = None
|
|
83
|
+
if self.type == ConnectionTypeEnum.SQLALCHEMY:
|
|
75
84
|
dialect = self.engine.dialect.name
|
|
85
|
+
elif self.type == ConnectionTypeEnum.DUCKDB:
|
|
86
|
+
dialect = self.uri.split(':')[0]
|
|
87
|
+
default_dialect = 'duckdb'
|
|
76
88
|
else:
|
|
77
89
|
url = urlparse(self.uri)
|
|
78
90
|
dialect = url.scheme
|
|
79
91
|
|
|
80
|
-
processed_dialect = next((d for d in ['sqlite', 'postgres', 'mysql'] if dialect.lower().startswith(d)),
|
|
92
|
+
processed_dialect = next((d for d in ['sqlite', 'postgres', 'mysql', 'duckdb'] if dialect.lower().startswith(d)), default_dialect)
|
|
81
93
|
dialect = processed_dialect if processed_dialect is not None else dialect
|
|
82
94
|
return dialect
|
|
83
95
|
|
|
84
96
|
@cached_property
|
|
85
97
|
def attach_uri_for_duckdb(self) -> str | None:
|
|
86
|
-
if self.type ==
|
|
98
|
+
if self.type == ConnectionTypeEnum.DUCKDB:
|
|
99
|
+
return self.uri
|
|
100
|
+
elif self.type == ConnectionTypeEnum.SQLALCHEMY:
|
|
87
101
|
url = self.engine.url
|
|
88
102
|
host = url.host
|
|
89
103
|
port = url.port
|
|
90
104
|
username = url.username
|
|
91
105
|
password = url.password
|
|
92
106
|
database = url.database
|
|
93
|
-
|
|
107
|
+
database_as_file = database if database is not None else ""
|
|
94
108
|
else:
|
|
95
109
|
url = urlparse(self.uri)
|
|
96
110
|
host = url.hostname
|
|
@@ -98,14 +112,18 @@ class ConnectionProperties(BaseModel):
|
|
|
98
112
|
username = url.username
|
|
99
113
|
password = url.password
|
|
100
114
|
database = url.path.lstrip('/')
|
|
101
|
-
|
|
115
|
+
database_as_file = self.uri.replace(f"{self.dialect}://", "")
|
|
102
116
|
|
|
103
|
-
if self.dialect
|
|
104
|
-
|
|
105
|
-
elif self.dialect
|
|
106
|
-
|
|
117
|
+
if self.dialect in ('postgres', 'mysql'):
|
|
118
|
+
attach_uri = f"{self.dialect}:dbname={database} user={username} password={password} host={host} port={port}"
|
|
119
|
+
elif self.dialect == "sqlite":
|
|
120
|
+
attach_uri = f"{self.dialect}:{database_as_file}"
|
|
121
|
+
elif self.dialect == "duckdb":
|
|
122
|
+
attach_uri = database_as_file
|
|
107
123
|
else:
|
|
108
|
-
|
|
124
|
+
attach_uri = None
|
|
125
|
+
|
|
126
|
+
return attach_uri
|
|
109
127
|
|
|
110
128
|
|
|
111
129
|
class DbConnConfig(ConnectionProperties, _ConfigWithNameBaseModel):
|
|
@@ -114,6 +132,16 @@ class DbConnConfig(ConnectionProperties, _ConfigWithNameBaseModel):
|
|
|
114
132
|
return self
|
|
115
133
|
|
|
116
134
|
|
|
135
|
+
class DatasetConfigurablesConfig(BaseModel):
|
|
136
|
+
name: str
|
|
137
|
+
default: str
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class ConfigurablesConfig(DatasetConfigurablesConfig):
|
|
141
|
+
label: str = ""
|
|
142
|
+
description: str = ""
|
|
143
|
+
|
|
144
|
+
|
|
117
145
|
class ParametersConfig(BaseModel):
|
|
118
146
|
type: str
|
|
119
147
|
factory: str
|
|
@@ -126,6 +154,20 @@ class PermissionScope(Enum):
|
|
|
126
154
|
PRIVATE = 2
|
|
127
155
|
|
|
128
156
|
|
|
157
|
+
class AuthenticationEnforcement(Enum):
|
|
158
|
+
REQUIRED = "required"
|
|
159
|
+
OPTIONAL = "optional"
|
|
160
|
+
DISABLED = "disabled"
|
|
161
|
+
|
|
162
|
+
class AuthenticationType(Enum):
|
|
163
|
+
MANAGED = "managed"
|
|
164
|
+
EXTERNAL = "external"
|
|
165
|
+
|
|
166
|
+
class AuthenticationConfig(BaseModel):
|
|
167
|
+
enforcement: AuthenticationEnforcement = AuthenticationEnforcement.OPTIONAL
|
|
168
|
+
type: AuthenticationType = AuthenticationType.MANAGED
|
|
169
|
+
|
|
170
|
+
|
|
129
171
|
class AnalyticsOutputConfig(_ConfigWithNameBaseModel):
|
|
130
172
|
label: str = ""
|
|
131
173
|
description: str = ""
|
|
@@ -149,14 +191,9 @@ class AnalyticsOutputConfig(_ConfigWithNameBaseModel):
|
|
|
149
191
|
raise ValueError(f'Scope "{value}" is invalid for dataset/dashboard "{name}". Scope must be one of {scope_list}') from e
|
|
150
192
|
|
|
151
193
|
|
|
152
|
-
class DatasetTraitConfig(_ConfigWithNameBaseModel):
|
|
153
|
-
default: Any
|
|
154
|
-
|
|
155
|
-
|
|
156
194
|
class DatasetConfig(AnalyticsOutputConfig):
|
|
157
195
|
model: str = ""
|
|
158
|
-
|
|
159
|
-
default_test_set: str = ""
|
|
196
|
+
configurables: list[DatasetConfigurablesConfig] = Field(default_factory=list)
|
|
160
197
|
|
|
161
198
|
def __hash__(self) -> int:
|
|
162
199
|
return hash("dataset_"+self.name)
|
|
@@ -168,23 +205,24 @@ class DatasetConfig(AnalyticsOutputConfig):
|
|
|
168
205
|
return self
|
|
169
206
|
|
|
170
207
|
|
|
208
|
+
class TestSetsUserConfig(BaseModel):
|
|
209
|
+
access_level: Literal["admin", "member", "guest"] = "guest"
|
|
210
|
+
custom_fields: dict[str, Any] = Field(default_factory=dict)
|
|
211
|
+
|
|
171
212
|
class TestSetsConfig(_ConfigWithNameBaseModel):
|
|
172
|
-
|
|
173
|
-
user_attributes: dict[str, Any] | None = None
|
|
213
|
+
user: TestSetsUserConfig = Field(default_factory=TestSetsUserConfig)
|
|
174
214
|
parameters: dict[str, Any] = Field(default_factory=dict)
|
|
175
|
-
|
|
176
|
-
@property
|
|
177
|
-
def is_authenticated(self) -> bool:
|
|
178
|
-
return self.user_attributes is not None
|
|
215
|
+
configurables: dict[str, Any] = Field(default_factory=dict)
|
|
179
216
|
|
|
180
217
|
|
|
181
218
|
class ManifestConfig(BaseModel):
|
|
182
219
|
project_variables: ProjectVarsConfig
|
|
220
|
+
authentication: AuthenticationConfig = Field(default_factory=AuthenticationConfig)
|
|
183
221
|
packages: list[PackageConfig] = Field(default_factory=list)
|
|
184
222
|
connections: dict[str, DbConnConfig] = Field(default_factory=dict)
|
|
185
223
|
parameters: list[ParametersConfig] = Field(default_factory=list)
|
|
224
|
+
configurables: dict[str, ConfigurablesConfig] = Field(default_factory=dict)
|
|
186
225
|
selection_test_sets: dict[str, TestSetsConfig] = Field(default_factory=dict)
|
|
187
|
-
dataset_traits: dict[str, DatasetTraitConfig] = Field(default_factory=dict)
|
|
188
226
|
datasets: dict[str, DatasetConfig] = Field(default_factory=dict)
|
|
189
227
|
base_path: str = "."
|
|
190
228
|
env_vars: dict[str, str] = Field(default_factory=dict)
|
|
@@ -199,13 +237,13 @@ class ManifestConfig(BaseModel):
|
|
|
199
237
|
set_of_directories.add(package.directory)
|
|
200
238
|
return packages
|
|
201
239
|
|
|
202
|
-
@field_validator("connections", "selection_test_sets", "
|
|
240
|
+
@field_validator("connections", "selection_test_sets", "datasets", "configurables", mode="before")
|
|
203
241
|
@classmethod
|
|
204
242
|
def names_are_unique(cls, values: list[dict] | dict[str, dict], info: ValidationInfo) -> dict[str, dict]:
|
|
205
243
|
if isinstance(values, list):
|
|
206
244
|
values_as_dict = {}
|
|
207
245
|
for obj in values:
|
|
208
|
-
name = obj["name"]
|
|
246
|
+
name = u.normalize_name(obj["name"])
|
|
209
247
|
if name in values_as_dict:
|
|
210
248
|
raise ValueError(f'In the {info.field_name} section, the name "{name}" was specified multiple times')
|
|
211
249
|
values_as_dict[name] = obj
|
|
@@ -220,45 +258,62 @@ class ManifestConfig(BaseModel):
|
|
|
220
258
|
return self
|
|
221
259
|
|
|
222
260
|
@model_validator(mode="after")
|
|
223
|
-
def
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
261
|
+
def validate_authentication_and_scopes(self) -> Self:
|
|
262
|
+
"""
|
|
263
|
+
Enforce authentication rules:
|
|
264
|
+
- If authentication.is_required, no dataset may be PUBLIC.
|
|
265
|
+
"""
|
|
266
|
+
if self.authentication.enforcement == AuthenticationEnforcement.REQUIRED:
|
|
267
|
+
invalid = [name for name, ds in self.datasets.items() if ds.scope == PermissionScope.PUBLIC]
|
|
268
|
+
if invalid:
|
|
269
|
+
raise ValueError(
|
|
270
|
+
"Authentication is required, so datasets cannot be public. "
|
|
271
|
+
f"Update the scope for datasets: {invalid}"
|
|
272
|
+
)
|
|
273
|
+
return self
|
|
274
|
+
|
|
275
|
+
@model_validator(mode="after")
|
|
276
|
+
def validate_dataset_configurables(self) -> Self:
|
|
277
|
+
"""
|
|
278
|
+
Validate that dataset configurables reference valid project-level configurables.
|
|
279
|
+
"""
|
|
280
|
+
for dataset_name, dataset_cfg in self.datasets.items():
|
|
281
|
+
for cfg_override in dataset_cfg.configurables:
|
|
282
|
+
if cfg_override.name not in self.configurables:
|
|
228
283
|
raise ValueError(
|
|
229
|
-
f'Dataset "{dataset_name}" references
|
|
230
|
-
f'
|
|
284
|
+
f'Dataset "{dataset_name}" references configurable "{cfg_override.name}" which is not defined '
|
|
285
|
+
f'in the project configurables'
|
|
231
286
|
)
|
|
232
|
-
|
|
233
|
-
# Set default values for any traits that are missing
|
|
234
|
-
for trait_name, trait_config in self.dataset_traits.items():
|
|
235
|
-
if trait_name not in dataset.traits:
|
|
236
|
-
dataset.traits[trait_name] = trait_config.default
|
|
237
|
-
|
|
238
287
|
return self
|
|
239
288
|
|
|
240
|
-
def get_default_test_set(self
|
|
289
|
+
def get_default_test_set(self) -> TestSetsConfig:
|
|
241
290
|
"""
|
|
242
291
|
Raises KeyError if dataset name doesn't exist
|
|
243
292
|
"""
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
default_name = default_name_1 if default_name_1 else default_name_2
|
|
247
|
-
default_test_set = self.selection_test_sets.get(default_name, TestSetsConfig(name=default_name))
|
|
293
|
+
default_default_test_set = TestSetsConfig(name=c.DEFAULT_TEST_SET_NAME)
|
|
294
|
+
default_test_set = self.selection_test_sets.get(c.DEFAULT_TEST_SET_NAME, default_default_test_set)
|
|
248
295
|
return default_test_set
|
|
249
296
|
|
|
250
|
-
def
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
for
|
|
260
|
-
|
|
261
|
-
|
|
297
|
+
def get_default_configurables(self, dataset_name: str | None = None) -> dict[str, str]:
|
|
298
|
+
"""
|
|
299
|
+
Return a dictionary of configurable name to its default value.
|
|
300
|
+
|
|
301
|
+
If dataset_name is provided, merges project-level defaults with dataset-specific overrides.
|
|
302
|
+
|
|
303
|
+
Supports both list- and dict-shaped internal storage for configurables.
|
|
304
|
+
"""
|
|
305
|
+
defaults: dict[str, str] = {}
|
|
306
|
+
for name, cfg in self.configurables.items():
|
|
307
|
+
defaults[name] = str(cfg.default)
|
|
308
|
+
|
|
309
|
+
# Apply dataset-specific overrides if dataset_name is provided
|
|
310
|
+
if dataset_name is not None:
|
|
311
|
+
dataset_cfg = self.datasets.get(dataset_name)
|
|
312
|
+
if dataset_cfg:
|
|
313
|
+
for cfg_override in dataset_cfg.configurables:
|
|
314
|
+
defaults[cfg_override.name] = cfg_override.default
|
|
315
|
+
|
|
316
|
+
return defaults
|
|
262
317
|
|
|
263
318
|
|
|
264
319
|
class ManifestIO:
|
|
@@ -269,7 +324,19 @@ class ManifestIO:
|
|
|
269
324
|
|
|
270
325
|
raw_content = u.read_file(u.Path(base_path, c.MANIFEST_FILE))
|
|
271
326
|
content = u.render_string(raw_content, base_path=base_path, env_vars=env_vars)
|
|
272
|
-
manifest_content = yaml.safe_load(content)
|
|
327
|
+
manifest_content: dict[str, Any] = yaml.safe_load(content)
|
|
328
|
+
|
|
329
|
+
auth_cfg: dict[str, Any] = manifest_content.get("authentication", {})
|
|
330
|
+
is_auth_required = bool(auth_cfg.get("is_required", False))
|
|
331
|
+
|
|
332
|
+
if is_auth_required:
|
|
333
|
+
# If authentication is required, assume PROTECTED when scope is not specified
|
|
334
|
+
# while explicitly forbidding PUBLIC (enforced in model validator)
|
|
335
|
+
datasets_raw = manifest_content.get("datasets", [])
|
|
336
|
+
for ds in datasets_raw:
|
|
337
|
+
if isinstance(ds, dict) and "scope" not in ds:
|
|
338
|
+
ds["scope"] = "protected"
|
|
339
|
+
|
|
273
340
|
try:
|
|
274
341
|
manifest_cfg = ManifestConfig(base_path=base_path, **manifest_content)
|
|
275
342
|
except ValidationError as e:
|
squirrels/_model_builder.py
CHANGED
|
@@ -1,31 +1,26 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
|
-
import
|
|
2
|
+
import duckdb, time
|
|
3
3
|
|
|
4
4
|
from . import _utils as u, _connection_set as cs, _models as m
|
|
5
|
-
from ._exceptions import InvalidInputError
|
|
6
5
|
|
|
7
6
|
|
|
8
7
|
@dataclass
|
|
9
8
|
class ModelBuilder:
|
|
10
|
-
|
|
9
|
+
_datalake_db_path: str
|
|
11
10
|
_conn_set: cs.ConnectionSet
|
|
12
11
|
_static_models: dict[str, m.StaticModel]
|
|
13
12
|
_conn_args: cs.ConnectionsArgs = field(default_factory=lambda: cs.ConnectionsArgs(".", {}, {}))
|
|
14
13
|
_logger: u.Logger = field(default_factory=lambda: u.Logger(""))
|
|
15
14
|
|
|
16
|
-
def _attach_connections(self, duckdb_conn: duckdb.DuckDBPyConnection) ->
|
|
17
|
-
dialect_by_conn_name: dict[str, str] = {}
|
|
15
|
+
def _attach_connections(self, duckdb_conn: duckdb.DuckDBPyConnection) -> None:
|
|
18
16
|
for conn_name, conn_props in self._conn_set.get_connections_as_dict().items():
|
|
19
17
|
if not isinstance(conn_props, m.ConnectionProperties):
|
|
20
18
|
continue
|
|
21
|
-
dialect = conn_props.dialect
|
|
22
19
|
attach_uri = conn_props.attach_uri_for_duckdb
|
|
23
20
|
if attach_uri is None:
|
|
24
21
|
continue # skip unsupported dialects
|
|
25
|
-
attach_stmt = f"ATTACH IF NOT EXISTS '{attach_uri}' AS db_{conn_name} (
|
|
22
|
+
attach_stmt = f"ATTACH IF NOT EXISTS '{attach_uri}' AS db_{conn_name} (READ_ONLY)"
|
|
26
23
|
u.run_duckdb_stmt(self._logger, duckdb_conn, attach_stmt, redacted_values=[attach_uri])
|
|
27
|
-
dialect_by_conn_name[conn_name] = dialect
|
|
28
|
-
return dialect_by_conn_name
|
|
29
24
|
|
|
30
25
|
async def _build_models(self, duckdb_conn: duckdb.DuckDBPyConnection, select: str | None, full_refresh: bool) -> None:
|
|
31
26
|
"""
|
|
@@ -50,62 +45,25 @@ class ModelBuilder:
|
|
|
50
45
|
coroutines = []
|
|
51
46
|
for model_name in terminal_nodes:
|
|
52
47
|
model = self._static_models[model_name]
|
|
48
|
+
# await model.build_model(duckdb_conn, full_refresh)
|
|
53
49
|
coro = model.build_model(duckdb_conn, full_refresh)
|
|
54
50
|
coroutines.append(coro)
|
|
55
51
|
await u.asyncio_gather(coroutines)
|
|
56
52
|
|
|
57
|
-
async def build(self, full_refresh: bool, select: str | None
|
|
53
|
+
async def build(self, full_refresh: bool, select: str | None) -> None:
|
|
58
54
|
start = time.time()
|
|
59
55
|
|
|
60
|
-
#
|
|
61
|
-
|
|
62
|
-
duckdb_path.parent.mkdir(parents=True, exist_ok=True)
|
|
63
|
-
|
|
64
|
-
# Delete any existing DuckDB file if full refresh is requested
|
|
65
|
-
duckdb_dev_path = u.Path(self._duckdb_venv_path + ".dev")
|
|
66
|
-
duckdb_stg_path = u.Path(self._duckdb_venv_path + ".stg")
|
|
67
|
-
|
|
68
|
-
# If the development copy is already in use, a concurrent build is not allowed
|
|
69
|
-
duckdb_dev_lock_path = u.Path(self._duckdb_venv_path + ".dev.lock")
|
|
70
|
-
if duckdb_dev_lock_path.exists():
|
|
71
|
-
raise InvalidInputError(60, "An existing build process is already running and a concurrent build is not allowed")
|
|
72
|
-
duckdb_dev_lock_path.touch(exist_ok=False)
|
|
73
|
-
|
|
74
|
-
# Ensure the lock file is deleted even if an exception is raised
|
|
75
|
-
try:
|
|
76
|
-
# If not full refresh, create a development copy of the existing virtual data environment
|
|
77
|
-
if not full_refresh:
|
|
78
|
-
if duckdb_stg_path.exists():
|
|
79
|
-
duckdb_stg_path.replace(duckdb_dev_path)
|
|
80
|
-
elif duckdb_path.exists():
|
|
81
|
-
shutil.copy(duckdb_path, duckdb_dev_path)
|
|
82
|
-
|
|
83
|
-
self._logger.log_activity_time("creating development copy of virtual data environment", start)
|
|
84
|
-
|
|
85
|
-
# Connect to DuckDB file
|
|
86
|
-
duckdb_conn = u.create_duckdb_connection(duckdb_dev_path)
|
|
87
|
-
|
|
88
|
-
except Exception:
|
|
89
|
-
duckdb_dev_lock_path.unlink()
|
|
90
|
-
raise
|
|
56
|
+
# Connect directly to DuckLake instead of attaching (supports concurrent connections)
|
|
57
|
+
duckdb_conn = u.create_duckdb_connection(self._datalake_db_path)
|
|
91
58
|
|
|
92
|
-
# Sometimes code after conn.close() doesn't run (as if the python process is killed but no error is raised)
|
|
93
|
-
# Using a new try block to ensure the lock file is removed before closing the connection
|
|
94
59
|
try:
|
|
95
60
|
# Attach connections
|
|
96
61
|
self._attach_connections(duckdb_conn)
|
|
97
62
|
|
|
98
63
|
# Construct build models
|
|
99
64
|
await self._build_models(duckdb_conn, select, full_refresh)
|
|
100
|
-
|
|
65
|
+
|
|
101
66
|
finally:
|
|
102
|
-
duckdb_dev_lock_path.unlink()
|
|
103
67
|
duckdb_conn.close()
|
|
104
68
|
|
|
105
|
-
|
|
106
|
-
if stage_file:
|
|
107
|
-
duckdb_dev_path.replace(duckdb_stg_path)
|
|
108
|
-
else:
|
|
109
|
-
duckdb_dev_path.replace(duckdb_path)
|
|
110
|
-
|
|
111
|
-
self._logger.log_activity_time("TOTAL TIME to build virtual data environment", start)
|
|
69
|
+
self._logger.log_activity_time("TOTAL TIME to build the Virtual Data Lake (VDL)", start)
|
squirrels/_model_configs.py
CHANGED
|
@@ -47,7 +47,7 @@ class QueryModelConfig(ModelConfig):
|
|
|
47
47
|
|
|
48
48
|
|
|
49
49
|
class BuildModelConfig(QueryModelConfig):
|
|
50
|
-
materialization: str = Field(default="
|
|
50
|
+
materialization: str = Field(default="VIEW", description="The materialization of the model (ignored if Python model which is always a table)")
|
|
51
51
|
|
|
52
52
|
def get_sql_for_build(self, model_name: str, select_query: str) -> str:
|
|
53
53
|
if self.materialization.upper() == "TABLE":
|
|
@@ -57,7 +57,7 @@ class BuildModelConfig(QueryModelConfig):
|
|
|
57
57
|
else:
|
|
58
58
|
raise ValueError(f"Invalid materialization: {self.materialization}")
|
|
59
59
|
|
|
60
|
-
create_prefix = f"CREATE OR REPLACE {materialization} {model_name} AS\n"
|
|
60
|
+
create_prefix = f"CREATE OR REPLACE {materialization} {model_name} AS\n\n"
|
|
61
61
|
return create_prefix + select_query
|
|
62
62
|
|
|
63
63
|
|
|
@@ -70,5 +70,5 @@ class FederateModelConfig(QueryModelConfig):
|
|
|
70
70
|
|
|
71
71
|
def get_sql_for_create(self, model_name: str, select_query: str) -> str:
|
|
72
72
|
materialization = "TABLE" if self.eager else "VIEW"
|
|
73
|
-
create_prefix = f"CREATE {materialization} {model_name} AS\n"
|
|
73
|
+
create_prefix = f"CREATE {materialization} {model_name} AS\n\n"
|
|
74
74
|
return create_prefix + select_query
|
squirrels/_model_queries.py
CHANGED
|
@@ -3,7 +3,7 @@ from dataclasses import dataclass, field
|
|
|
3
3
|
from typing import Callable, Generic, TypeVar, Any
|
|
4
4
|
import polars as pl, pandas as pd
|
|
5
5
|
|
|
6
|
-
from .
|
|
6
|
+
from ._arguments.run_time_args import BuildModelArgs
|
|
7
7
|
from ._model_configs import ModelConfig
|
|
8
8
|
|
|
9
9
|
|