squirrels 0.5.0rc0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of squirrels might be problematic. Click here for more details.

Files changed (108) hide show
  1. dateutils/__init__.py +6 -0
  2. dateutils/_enums.py +25 -0
  3. squirrels/dateutils.py → dateutils/_implementation.py +58 -111
  4. dateutils/types.py +6 -0
  5. squirrels/__init__.py +10 -12
  6. squirrels/_api_routes/__init__.py +5 -0
  7. squirrels/_api_routes/auth.py +271 -0
  8. squirrels/_api_routes/base.py +171 -0
  9. squirrels/_api_routes/dashboards.py +158 -0
  10. squirrels/_api_routes/data_management.py +148 -0
  11. squirrels/_api_routes/datasets.py +265 -0
  12. squirrels/_api_routes/oauth2.py +298 -0
  13. squirrels/_api_routes/project.py +252 -0
  14. squirrels/_api_server.py +245 -781
  15. squirrels/_arguments/__init__.py +0 -0
  16. squirrels/{arguments → _arguments}/init_time_args.py +7 -2
  17. squirrels/{arguments → _arguments}/run_time_args.py +13 -35
  18. squirrels/_auth.py +720 -212
  19. squirrels/_command_line.py +81 -41
  20. squirrels/_compile_prompts.py +147 -0
  21. squirrels/_connection_set.py +16 -7
  22. squirrels/_constants.py +29 -9
  23. squirrels/{_dashboards_io.py → _dashboards.py} +87 -6
  24. squirrels/_data_sources.py +570 -0
  25. squirrels/{dataset_result.py → _dataset_types.py} +2 -4
  26. squirrels/_exceptions.py +9 -37
  27. squirrels/_initializer.py +83 -59
  28. squirrels/_logging.py +117 -0
  29. squirrels/_manifest.py +129 -62
  30. squirrels/_model_builder.py +10 -52
  31. squirrels/_model_configs.py +3 -3
  32. squirrels/_model_queries.py +1 -1
  33. squirrels/_models.py +249 -118
  34. squirrels/{package_data → _package_data}/base_project/.env +16 -4
  35. squirrels/{package_data → _package_data}/base_project/.env.example +15 -3
  36. squirrels/{package_data → _package_data}/base_project/connections.yml +4 -3
  37. squirrels/{package_data → _package_data}/base_project/dashboards/dashboard_example.py +4 -4
  38. squirrels/_package_data/base_project/dashboards/dashboard_example.yml +22 -0
  39. squirrels/{package_data → _package_data}/base_project/duckdb_init.sql +1 -0
  40. squirrels/_package_data/base_project/macros/macros_example.sql +17 -0
  41. squirrels/{package_data → _package_data}/base_project/models/builds/build_example.py +2 -2
  42. squirrels/{package_data → _package_data}/base_project/models/builds/build_example.sql +1 -1
  43. squirrels/{package_data → _package_data}/base_project/models/builds/build_example.yml +2 -0
  44. squirrels/_package_data/base_project/models/dbviews/dbview_example.sql +17 -0
  45. squirrels/_package_data/base_project/models/dbviews/dbview_example.yml +32 -0
  46. squirrels/_package_data/base_project/models/federates/federate_example.py +48 -0
  47. squirrels/_package_data/base_project/models/federates/federate_example.sql +21 -0
  48. squirrels/{package_data → _package_data}/base_project/models/federates/federate_example.yml +7 -7
  49. squirrels/{package_data → _package_data}/base_project/models/sources.yml +5 -6
  50. squirrels/{package_data → _package_data}/base_project/parameters.yml +32 -45
  51. squirrels/_package_data/base_project/pyconfigs/connections.py +18 -0
  52. squirrels/{package_data → _package_data}/base_project/pyconfigs/context.py +31 -22
  53. squirrels/_package_data/base_project/pyconfigs/parameters.py +141 -0
  54. squirrels/_package_data/base_project/pyconfigs/user.py +44 -0
  55. squirrels/{package_data → _package_data}/base_project/seeds/seed_categories.yml +1 -1
  56. squirrels/{package_data → _package_data}/base_project/seeds/seed_subcategories.yml +1 -1
  57. squirrels/_package_data/base_project/squirrels.yml.j2 +61 -0
  58. squirrels/_package_data/templates/dataset_results.html +112 -0
  59. squirrels/_package_data/templates/oauth_login.html +271 -0
  60. squirrels/_package_data/templates/squirrels_studio.html +20 -0
  61. squirrels/_parameter_configs.py +76 -55
  62. squirrels/_parameter_options.py +348 -0
  63. squirrels/_parameter_sets.py +53 -45
  64. squirrels/_parameters.py +1664 -0
  65. squirrels/_project.py +403 -242
  66. squirrels/_py_module.py +3 -2
  67. squirrels/_request_context.py +33 -0
  68. squirrels/_schemas/__init__.py +0 -0
  69. squirrels/_schemas/auth_models.py +167 -0
  70. squirrels/_schemas/query_param_models.py +75 -0
  71. squirrels/{_api_response_models.py → _schemas/response_models.py} +48 -18
  72. squirrels/_seeds.py +1 -1
  73. squirrels/_sources.py +23 -19
  74. squirrels/_utils.py +121 -39
  75. squirrels/_version.py +1 -1
  76. squirrels/arguments.py +7 -0
  77. squirrels/auth.py +4 -0
  78. squirrels/connections.py +3 -0
  79. squirrels/dashboards.py +2 -81
  80. squirrels/data_sources.py +14 -563
  81. squirrels/parameter_options.py +13 -348
  82. squirrels/parameters.py +14 -1266
  83. squirrels/types.py +16 -0
  84. {squirrels-0.5.0rc0.dist-info → squirrels-0.5.1.dist-info}/METADATA +42 -30
  85. squirrels-0.5.1.dist-info/RECORD +98 -0
  86. squirrels/package_data/base_project/dashboards/dashboard_example.yml +0 -22
  87. squirrels/package_data/base_project/macros/macros_example.sql +0 -15
  88. squirrels/package_data/base_project/models/dbviews/dbview_example.sql +0 -12
  89. squirrels/package_data/base_project/models/dbviews/dbview_example.yml +0 -26
  90. squirrels/package_data/base_project/models/federates/federate_example.py +0 -44
  91. squirrels/package_data/base_project/models/federates/federate_example.sql +0 -17
  92. squirrels/package_data/base_project/pyconfigs/connections.py +0 -14
  93. squirrels/package_data/base_project/pyconfigs/parameters.py +0 -93
  94. squirrels/package_data/base_project/pyconfigs/user.py +0 -23
  95. squirrels/package_data/base_project/squirrels.yml.j2 +0 -71
  96. squirrels-0.5.0rc0.dist-info/RECORD +0 -70
  97. /squirrels/{package_data → _package_data}/base_project/assets/expenses.db +0 -0
  98. /squirrels/{package_data → _package_data}/base_project/assets/weather.db +0 -0
  99. /squirrels/{package_data → _package_data}/base_project/docker/.dockerignore +0 -0
  100. /squirrels/{package_data → _package_data}/base_project/docker/Dockerfile +0 -0
  101. /squirrels/{package_data → _package_data}/base_project/docker/compose.yml +0 -0
  102. /squirrels/{package_data/base_project/.gitignore → _package_data/base_project/gitignore} +0 -0
  103. /squirrels/{package_data → _package_data}/base_project/seeds/seed_categories.csv +0 -0
  104. /squirrels/{package_data → _package_data}/base_project/seeds/seed_subcategories.csv +0 -0
  105. /squirrels/{package_data → _package_data}/base_project/tmp/.gitignore +0 -0
  106. {squirrels-0.5.0rc0.dist-info → squirrels-0.5.1.dist-info}/WHEEL +0 -0
  107. {squirrels-0.5.0rc0.dist-info → squirrels-0.5.1.dist-info}/entry_points.txt +0 -0
  108. {squirrels-0.5.0rc0.dist-info → squirrels-0.5.1.dist-info}/licenses/LICENSE +0 -0
squirrels/_manifest.py CHANGED
@@ -1,11 +1,11 @@
1
1
  from functools import cached_property
2
- from typing import Any
2
+ from typing import Literal, Any
3
3
  from urllib.parse import urlparse
4
4
  from sqlalchemy import Engine, create_engine
5
5
  from typing_extensions import Self
6
6
  from enum import Enum
7
7
  from pydantic import BaseModel, Field, field_validator, model_validator, ValidationInfo, ValidationError
8
- import yaml, time
8
+ import yaml, time, re
9
9
 
10
10
  from . import _constants as c, _utils as u
11
11
 
@@ -16,10 +16,17 @@ class ProjectVarsConfig(BaseModel, extra="allow"):
16
16
  description: str = ""
17
17
  major_version: int
18
18
 
19
+ @field_validator("name")
20
+ @classmethod
21
+ def validate_name(cls, v: str) -> str:
22
+ if not re.fullmatch(r"[A-Za-z0-9_-]+", v):
23
+ raise ValueError("Project name must only contain alphanumeric characters, underscores, and dashes.")
24
+ return v
25
+
19
26
  @model_validator(mode="after")
20
27
  def finalize_label(self) -> Self:
21
28
  if self.label == "":
22
- self.label = self.name
29
+ self.label = u.to_title_case(self.name)
23
30
  return self
24
31
 
25
32
 
@@ -39,10 +46,11 @@ class _ConfigWithNameBaseModel(BaseModel):
39
46
  name: str
40
47
 
41
48
 
42
- class ConnectionType(Enum):
49
+ class ConnectionTypeEnum(Enum):
43
50
  SQLALCHEMY = "sqlalchemy"
44
51
  CONNECTORX = "connectorx"
45
52
  ADBC = "adbc"
53
+ DUCKDB = "duckdb"
46
54
 
47
55
 
48
56
  class ConnectionProperties(BaseModel):
@@ -54,7 +62,7 @@ class ConnectionProperties(BaseModel):
54
62
  uri: The URI for the connection
55
63
  """
56
64
  label: str | None = None
57
- type: ConnectionType = Field(default=ConnectionType.SQLALCHEMY)
65
+ type: ConnectionTypeEnum = Field(default=ConnectionTypeEnum.SQLALCHEMY)
58
66
  uri: str
59
67
  sa_create_engine_args: dict[str, Any] = Field(default_factory=dict)
60
68
 
@@ -64,33 +72,39 @@ class ConnectionProperties(BaseModel):
64
72
  Creates and caches a SQLAlchemy engine if the connection type is sqlalchemy.
65
73
  Returns None for other connection types.
66
74
  """
67
- if self.type == ConnectionType.SQLALCHEMY:
75
+ if self.type == ConnectionTypeEnum.SQLALCHEMY:
68
76
  return create_engine(self.uri, **self.sa_create_engine_args)
69
77
  else:
70
78
  raise ValueError(f'Connection type "{self.type}" does not support engine property')
71
79
 
72
80
  @cached_property
73
81
  def dialect(self) -> str:
74
- if self.type == ConnectionType.SQLALCHEMY:
82
+ default_dialect = None
83
+ if self.type == ConnectionTypeEnum.SQLALCHEMY:
75
84
  dialect = self.engine.dialect.name
85
+ elif self.type == ConnectionTypeEnum.DUCKDB:
86
+ dialect = self.uri.split(':')[0]
87
+ default_dialect = 'duckdb'
76
88
  else:
77
89
  url = urlparse(self.uri)
78
90
  dialect = url.scheme
79
91
 
80
- processed_dialect = next((d for d in ['sqlite', 'postgres', 'mysql'] if dialect.lower().startswith(d)), None)
92
+ processed_dialect = next((d for d in ['sqlite', 'postgres', 'mysql', 'duckdb'] if dialect.lower().startswith(d)), default_dialect)
81
93
  dialect = processed_dialect if processed_dialect is not None else dialect
82
94
  return dialect
83
95
 
84
96
  @cached_property
85
97
  def attach_uri_for_duckdb(self) -> str | None:
86
- if self.type == ConnectionType.SQLALCHEMY:
98
+ if self.type == ConnectionTypeEnum.DUCKDB:
99
+ return self.uri
100
+ elif self.type == ConnectionTypeEnum.SQLALCHEMY:
87
101
  url = self.engine.url
88
102
  host = url.host
89
103
  port = url.port
90
104
  username = url.username
91
105
  password = url.password
92
106
  database = url.database
93
- sqlite_database = database if database is not None else ""
107
+ database_as_file = database if database is not None else ""
94
108
  else:
95
109
  url = urlparse(self.uri)
96
110
  host = url.hostname
@@ -98,14 +112,18 @@ class ConnectionProperties(BaseModel):
98
112
  username = url.username
99
113
  password = url.password
100
114
  database = url.path.lstrip('/')
101
- sqlite_database = self.uri.replace(f"{self.dialect}://", "")
115
+ database_as_file = self.uri.replace(f"{self.dialect}://", "")
102
116
 
103
- if self.dialect == 'sqlite':
104
- return sqlite_database
105
- elif self.dialect in ('postgres', 'mysql'):
106
- return f"dbname={database} user={username} password={password} host={host} port={port}"
117
+ if self.dialect in ('postgres', 'mysql'):
118
+ attach_uri = f"{self.dialect}:dbname={database} user={username} password={password} host={host} port={port}"
119
+ elif self.dialect == "sqlite":
120
+ attach_uri = f"{self.dialect}:{database_as_file}"
121
+ elif self.dialect == "duckdb":
122
+ attach_uri = database_as_file
107
123
  else:
108
- return None
124
+ attach_uri = None
125
+
126
+ return attach_uri
109
127
 
110
128
 
111
129
  class DbConnConfig(ConnectionProperties, _ConfigWithNameBaseModel):
@@ -114,6 +132,16 @@ class DbConnConfig(ConnectionProperties, _ConfigWithNameBaseModel):
114
132
  return self
115
133
 
116
134
 
135
+ class DatasetConfigurablesConfig(BaseModel):
136
+ name: str
137
+ default: str
138
+
139
+
140
+ class ConfigurablesConfig(DatasetConfigurablesConfig):
141
+ label: str = ""
142
+ description: str = ""
143
+
144
+
117
145
  class ParametersConfig(BaseModel):
118
146
  type: str
119
147
  factory: str
@@ -126,6 +154,20 @@ class PermissionScope(Enum):
126
154
  PRIVATE = 2
127
155
 
128
156
 
157
+ class AuthenticationEnforcement(Enum):
158
+ REQUIRED = "required"
159
+ OPTIONAL = "optional"
160
+ DISABLED = "disabled"
161
+
162
+ class AuthenticationType(Enum):
163
+ MANAGED = "managed"
164
+ EXTERNAL = "external"
165
+
166
+ class AuthenticationConfig(BaseModel):
167
+ enforcement: AuthenticationEnforcement = AuthenticationEnforcement.OPTIONAL
168
+ type: AuthenticationType = AuthenticationType.MANAGED
169
+
170
+
129
171
  class AnalyticsOutputConfig(_ConfigWithNameBaseModel):
130
172
  label: str = ""
131
173
  description: str = ""
@@ -149,14 +191,9 @@ class AnalyticsOutputConfig(_ConfigWithNameBaseModel):
149
191
  raise ValueError(f'Scope "{value}" is invalid for dataset/dashboard "{name}". Scope must be one of {scope_list}') from e
150
192
 
151
193
 
152
- class DatasetTraitConfig(_ConfigWithNameBaseModel):
153
- default: Any
154
-
155
-
156
194
  class DatasetConfig(AnalyticsOutputConfig):
157
195
  model: str = ""
158
- traits: dict = Field(default_factory=dict)
159
- default_test_set: str = ""
196
+ configurables: list[DatasetConfigurablesConfig] = Field(default_factory=list)
160
197
 
161
198
  def __hash__(self) -> int:
162
199
  return hash("dataset_"+self.name)
@@ -168,23 +205,24 @@ class DatasetConfig(AnalyticsOutputConfig):
168
205
  return self
169
206
 
170
207
 
208
+ class TestSetsUserConfig(BaseModel):
209
+ access_level: Literal["admin", "member", "guest"] = "guest"
210
+ custom_fields: dict[str, Any] = Field(default_factory=dict)
211
+
171
212
  class TestSetsConfig(_ConfigWithNameBaseModel):
172
- datasets: list[str] | None = None
173
- user_attributes: dict[str, Any] | None = None
213
+ user: TestSetsUserConfig = Field(default_factory=TestSetsUserConfig)
174
214
  parameters: dict[str, Any] = Field(default_factory=dict)
175
-
176
- @property
177
- def is_authenticated(self) -> bool:
178
- return self.user_attributes is not None
215
+ configurables: dict[str, Any] = Field(default_factory=dict)
179
216
 
180
217
 
181
218
  class ManifestConfig(BaseModel):
182
219
  project_variables: ProjectVarsConfig
220
+ authentication: AuthenticationConfig = Field(default_factory=AuthenticationConfig)
183
221
  packages: list[PackageConfig] = Field(default_factory=list)
184
222
  connections: dict[str, DbConnConfig] = Field(default_factory=dict)
185
223
  parameters: list[ParametersConfig] = Field(default_factory=list)
224
+ configurables: dict[str, ConfigurablesConfig] = Field(default_factory=dict)
186
225
  selection_test_sets: dict[str, TestSetsConfig] = Field(default_factory=dict)
187
- dataset_traits: dict[str, DatasetTraitConfig] = Field(default_factory=dict)
188
226
  datasets: dict[str, DatasetConfig] = Field(default_factory=dict)
189
227
  base_path: str = "."
190
228
  env_vars: dict[str, str] = Field(default_factory=dict)
@@ -199,13 +237,13 @@ class ManifestConfig(BaseModel):
199
237
  set_of_directories.add(package.directory)
200
238
  return packages
201
239
 
202
- @field_validator("connections", "selection_test_sets", "dataset_traits", "datasets", mode="before")
240
+ @field_validator("connections", "selection_test_sets", "datasets", "configurables", mode="before")
203
241
  @classmethod
204
242
  def names_are_unique(cls, values: list[dict] | dict[str, dict], info: ValidationInfo) -> dict[str, dict]:
205
243
  if isinstance(values, list):
206
244
  values_as_dict = {}
207
245
  for obj in values:
208
- name = obj["name"]
246
+ name = u.normalize_name(obj["name"])
209
247
  if name in values_as_dict:
210
248
  raise ValueError(f'In the {info.field_name} section, the name "{name}" was specified multiple times')
211
249
  values_as_dict[name] = obj
@@ -220,45 +258,62 @@ class ManifestConfig(BaseModel):
220
258
  return self
221
259
 
222
260
  @model_validator(mode="after")
223
- def validate_dataset_traits(self) -> Self:
224
- for dataset_name, dataset in self.datasets.items():
225
- # Validate that all trait keys in dataset.traits exist in dataset_traits
226
- for trait_key in dataset.traits.keys():
227
- if trait_key not in self.dataset_traits:
261
+ def validate_authentication_and_scopes(self) -> Self:
262
+ """
263
+ Enforce authentication rules:
264
+ - If authentication.is_required, no dataset may be PUBLIC.
265
+ """
266
+ if self.authentication.enforcement == AuthenticationEnforcement.REQUIRED:
267
+ invalid = [name for name, ds in self.datasets.items() if ds.scope == PermissionScope.PUBLIC]
268
+ if invalid:
269
+ raise ValueError(
270
+ "Authentication is required, so datasets cannot be public. "
271
+ f"Update the scope for datasets: {invalid}"
272
+ )
273
+ return self
274
+
275
+ @model_validator(mode="after")
276
+ def validate_dataset_configurables(self) -> Self:
277
+ """
278
+ Validate that dataset configurables reference valid project-level configurables.
279
+ """
280
+ for dataset_name, dataset_cfg in self.datasets.items():
281
+ for cfg_override in dataset_cfg.configurables:
282
+ if cfg_override.name not in self.configurables:
228
283
  raise ValueError(
229
- f'Dataset "{dataset_name}" references undefined trait "{trait_key}". '
230
- f'Traits must be defined with a default value in the dataset_traits section.'
284
+ f'Dataset "{dataset_name}" references configurable "{cfg_override.name}" which is not defined '
285
+ f'in the project configurables'
231
286
  )
232
-
233
- # Set default values for any traits that are missing
234
- for trait_name, trait_config in self.dataset_traits.items():
235
- if trait_name not in dataset.traits:
236
- dataset.traits[trait_name] = trait_config.default
237
-
238
287
  return self
239
288
 
240
- def get_default_test_set(self, dataset_name: str) -> TestSetsConfig:
289
+ def get_default_test_set(self) -> TestSetsConfig:
241
290
  """
242
291
  Raises KeyError if dataset name doesn't exist
243
292
  """
244
- default_name_1 = self.datasets[dataset_name].default_test_set
245
- default_name_2 = self.env_vars.get(c.SQRL_TEST_SETS_DEFAULT_NAME_USED, "default")
246
- default_name = default_name_1 if default_name_1 else default_name_2
247
- default_test_set = self.selection_test_sets.get(default_name, TestSetsConfig(name=default_name))
293
+ default_default_test_set = TestSetsConfig(name=c.DEFAULT_TEST_SET_NAME)
294
+ default_test_set = self.selection_test_sets.get(c.DEFAULT_TEST_SET_NAME, default_default_test_set)
248
295
  return default_test_set
249
296
 
250
- def get_applicable_test_sets(self, dataset: str) -> list[str]:
251
- applicable_test_sets = []
252
- for test_set_name, test_set_config in self.selection_test_sets.items():
253
- if test_set_config.datasets is None or dataset in test_set_config.datasets:
254
- applicable_test_sets.append(test_set_name)
255
- return applicable_test_sets
256
-
257
- def get_default_traits(self) -> dict[str, Any]:
258
- default_traits = {}
259
- for trait_name, trait_config in self.dataset_traits.items():
260
- default_traits[trait_name] = trait_config.default
261
- return default_traits
297
+ def get_default_configurables(self, dataset_name: str | None = None) -> dict[str, str]:
298
+ """
299
+ Return a dictionary of configurable name to its default value.
300
+
301
+ If dataset_name is provided, merges project-level defaults with dataset-specific overrides.
302
+
303
+ Supports both list- and dict-shaped internal storage for configurables.
304
+ """
305
+ defaults: dict[str, str] = {}
306
+ for name, cfg in self.configurables.items():
307
+ defaults[name] = str(cfg.default)
308
+
309
+ # Apply dataset-specific overrides if dataset_name is provided
310
+ if dataset_name is not None:
311
+ dataset_cfg = self.datasets.get(dataset_name)
312
+ if dataset_cfg:
313
+ for cfg_override in dataset_cfg.configurables:
314
+ defaults[cfg_override.name] = cfg_override.default
315
+
316
+ return defaults
262
317
 
263
318
 
264
319
  class ManifestIO:
@@ -269,7 +324,19 @@ class ManifestIO:
269
324
 
270
325
  raw_content = u.read_file(u.Path(base_path, c.MANIFEST_FILE))
271
326
  content = u.render_string(raw_content, base_path=base_path, env_vars=env_vars)
272
- manifest_content = yaml.safe_load(content)
327
+ manifest_content: dict[str, Any] = yaml.safe_load(content)
328
+
329
+ auth_cfg: dict[str, Any] = manifest_content.get("authentication", {})
330
+ is_auth_required = bool(auth_cfg.get("is_required", False))
331
+
332
+ if is_auth_required:
333
+ # If authentication is required, assume PROTECTED when scope is not specified
334
+ # while explicitly forbidding PUBLIC (enforced in model validator)
335
+ datasets_raw = manifest_content.get("datasets", [])
336
+ for ds in datasets_raw:
337
+ if isinstance(ds, dict) and "scope" not in ds:
338
+ ds["scope"] = "protected"
339
+
273
340
  try:
274
341
  manifest_cfg = ManifestConfig(base_path=base_path, **manifest_content)
275
342
  except ValidationError as e:
@@ -1,31 +1,26 @@
1
1
  from dataclasses import dataclass, field
2
- import asyncio, shutil, duckdb, time
2
+ import duckdb, time
3
3
 
4
4
  from . import _utils as u, _connection_set as cs, _models as m
5
- from ._exceptions import InvalidInputError
6
5
 
7
6
 
8
7
  @dataclass
9
8
  class ModelBuilder:
10
- _duckdb_venv_path: str
9
+ _datalake_db_path: str
11
10
  _conn_set: cs.ConnectionSet
12
11
  _static_models: dict[str, m.StaticModel]
13
12
  _conn_args: cs.ConnectionsArgs = field(default_factory=lambda: cs.ConnectionsArgs(".", {}, {}))
14
13
  _logger: u.Logger = field(default_factory=lambda: u.Logger(""))
15
14
 
16
- def _attach_connections(self, duckdb_conn: duckdb.DuckDBPyConnection) -> dict[str, str]:
17
- dialect_by_conn_name: dict[str, str] = {}
15
+ def _attach_connections(self, duckdb_conn: duckdb.DuckDBPyConnection) -> None:
18
16
  for conn_name, conn_props in self._conn_set.get_connections_as_dict().items():
19
17
  if not isinstance(conn_props, m.ConnectionProperties):
20
18
  continue
21
- dialect = conn_props.dialect
22
19
  attach_uri = conn_props.attach_uri_for_duckdb
23
20
  if attach_uri is None:
24
21
  continue # skip unsupported dialects
25
- attach_stmt = f"ATTACH IF NOT EXISTS '{attach_uri}' AS db_{conn_name} (TYPE {dialect}, READ_ONLY)"
22
+ attach_stmt = f"ATTACH IF NOT EXISTS '{attach_uri}' AS db_{conn_name} (READ_ONLY)"
26
23
  u.run_duckdb_stmt(self._logger, duckdb_conn, attach_stmt, redacted_values=[attach_uri])
27
- dialect_by_conn_name[conn_name] = dialect
28
- return dialect_by_conn_name
29
24
 
30
25
  async def _build_models(self, duckdb_conn: duckdb.DuckDBPyConnection, select: str | None, full_refresh: bool) -> None:
31
26
  """
@@ -50,62 +45,25 @@ class ModelBuilder:
50
45
  coroutines = []
51
46
  for model_name in terminal_nodes:
52
47
  model = self._static_models[model_name]
48
+ # await model.build_model(duckdb_conn, full_refresh)
53
49
  coro = model.build_model(duckdb_conn, full_refresh)
54
50
  coroutines.append(coro)
55
51
  await u.asyncio_gather(coroutines)
56
52
 
57
- async def build(self, full_refresh: bool, select: str | None, stage_file: bool) -> None:
53
+ async def build(self, full_refresh: bool, select: str | None) -> None:
58
54
  start = time.time()
59
55
 
60
- # Create target folder if it doesn't exist
61
- duckdb_path = u.Path(self._duckdb_venv_path)
62
- duckdb_path.parent.mkdir(parents=True, exist_ok=True)
63
-
64
- # Delete any existing DuckDB file if full refresh is requested
65
- duckdb_dev_path = u.Path(self._duckdb_venv_path + ".dev")
66
- duckdb_stg_path = u.Path(self._duckdb_venv_path + ".stg")
67
-
68
- # If the development copy is already in use, a concurrent build is not allowed
69
- duckdb_dev_lock_path = u.Path(self._duckdb_venv_path + ".dev.lock")
70
- if duckdb_dev_lock_path.exists():
71
- raise InvalidInputError(60, "An existing build process is already running and a concurrent build is not allowed")
72
- duckdb_dev_lock_path.touch(exist_ok=False)
73
-
74
- # Ensure the lock file is deleted even if an exception is raised
75
- try:
76
- # If not full refresh, create a development copy of the existing virtual data environment
77
- if not full_refresh:
78
- if duckdb_stg_path.exists():
79
- duckdb_stg_path.replace(duckdb_dev_path)
80
- elif duckdb_path.exists():
81
- shutil.copy(duckdb_path, duckdb_dev_path)
82
-
83
- self._logger.log_activity_time("creating development copy of virtual data environment", start)
84
-
85
- # Connect to DuckDB file
86
- duckdb_conn = u.create_duckdb_connection(duckdb_dev_path)
87
-
88
- except Exception:
89
- duckdb_dev_lock_path.unlink()
90
- raise
56
+ # Connect directly to DuckLake instead of attaching (supports concurrent connections)
57
+ duckdb_conn = u.create_duckdb_connection(self._datalake_db_path)
91
58
 
92
- # Sometimes code after conn.close() doesn't run (as if the python process is killed but no error is raised)
93
- # Using a new try block to ensure the lock file is removed before closing the connection
94
59
  try:
95
60
  # Attach connections
96
61
  self._attach_connections(duckdb_conn)
97
62
 
98
63
  # Construct build models
99
64
  await self._build_models(duckdb_conn, select, full_refresh)
100
-
65
+
101
66
  finally:
102
- duckdb_dev_lock_path.unlink()
103
67
  duckdb_conn.close()
104
68
 
105
- # Rename duckdb_dev_path to duckdb_path (or duckdb_stg_path if stage_file is True)
106
- if stage_file:
107
- duckdb_dev_path.replace(duckdb_stg_path)
108
- else:
109
- duckdb_dev_path.replace(duckdb_path)
110
-
111
- self._logger.log_activity_time("TOTAL TIME to build virtual data environment", start)
69
+ self._logger.log_activity_time("TOTAL TIME to build the Virtual Data Lake (VDL)", start)
@@ -47,7 +47,7 @@ class QueryModelConfig(ModelConfig):
47
47
 
48
48
 
49
49
  class BuildModelConfig(QueryModelConfig):
50
- materialization: str = Field(default="TABLE", description="The materialization of the model (ignored if Python model which is always a table)")
50
+ materialization: str = Field(default="VIEW", description="The materialization of the model (ignored if Python model which is always a table)")
51
51
 
52
52
  def get_sql_for_build(self, model_name: str, select_query: str) -> str:
53
53
  if self.materialization.upper() == "TABLE":
@@ -57,7 +57,7 @@ class BuildModelConfig(QueryModelConfig):
57
57
  else:
58
58
  raise ValueError(f"Invalid materialization: {self.materialization}")
59
59
 
60
- create_prefix = f"CREATE OR REPLACE {materialization} {model_name} AS\n"
60
+ create_prefix = f"CREATE OR REPLACE {materialization} {model_name} AS\n\n"
61
61
  return create_prefix + select_query
62
62
 
63
63
 
@@ -70,5 +70,5 @@ class FederateModelConfig(QueryModelConfig):
70
70
 
71
71
  def get_sql_for_create(self, model_name: str, select_query: str) -> str:
72
72
  materialization = "TABLE" if self.eager else "VIEW"
73
- create_prefix = f"CREATE {materialization} {model_name} AS\n"
73
+ create_prefix = f"CREATE {materialization} {model_name} AS\n\n"
74
74
  return create_prefix + select_query
@@ -3,7 +3,7 @@ from dataclasses import dataclass, field
3
3
  from typing import Callable, Generic, TypeVar, Any
4
4
  import polars as pl, pandas as pd
5
5
 
6
- from .arguments.run_time_args import BuildModelArgs
6
+ from ._arguments.run_time_args import BuildModelArgs
7
7
  from ._model_configs import ModelConfig
8
8
 
9
9