squirrels 0.5.0b2__py3-none-any.whl → 0.5.0b3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dateutils/__init__.py +6 -460
- dateutils/_enums.py +25 -0
- dateutils/_implementation.py +409 -0
- dateutils/types.py +6 -0
- squirrels/__init__.py +7 -13
- squirrels/_api_server.py +5 -5
- squirrels/{arguments/init_time_args.py → _arguments/_init_time_args.py} +2 -2
- squirrels/{arguments/run_time_args.py → _arguments/_run_time_args.py} +4 -26
- squirrels/_auth.py +2 -2
- squirrels/_connection_set.py +5 -5
- squirrels/_constants.py +1 -1
- squirrels/_dashboard_types.py +82 -0
- squirrels/_dashboards_io.py +2 -2
- squirrels/_data_sources.py +564 -0
- squirrels/_exceptions.py +1 -1
- squirrels/_initializer.py +31 -26
- squirrels/_manifest.py +5 -5
- squirrels/_model_configs.py +2 -2
- squirrels/_model_queries.py +1 -1
- squirrels/_models.py +28 -16
- squirrels/{package_data → _package_data}/base_project/dashboards/dashboard_example.py +4 -4
- squirrels/{package_data → _package_data}/base_project/dashboards/dashboard_example.yml +2 -2
- squirrels/_package_data/base_project/macros/macros_example.sql +17 -0
- squirrels/{package_data → _package_data}/base_project/models/builds/build_example.py +2 -2
- squirrels/{package_data → _package_data}/base_project/models/builds/build_example.sql +1 -1
- squirrels/{package_data → _package_data}/base_project/models/dbviews/dbview_example.sql +1 -1
- squirrels/_package_data/base_project/models/federates/federate_example.py +41 -0
- squirrels/_package_data/base_project/models/federates/federate_example.sql +25 -0
- squirrels/{package_data → _package_data}/base_project/models/federates/federate_example.yml +6 -6
- squirrels/{package_data → _package_data}/base_project/parameters.yml +9 -8
- squirrels/_package_data/base_project/pyconfigs/connections.py +14 -0
- squirrels/{package_data → _package_data}/base_project/pyconfigs/context.py +14 -16
- squirrels/{package_data → _package_data}/base_project/pyconfigs/parameters.py +13 -8
- squirrels/{package_data → _package_data}/base_project/pyconfigs/user.py +2 -2
- squirrels/_parameter_configs.py +34 -34
- squirrels/_parameter_options.py +348 -0
- squirrels/_parameter_sets.py +18 -18
- squirrels/_parameters.py +1266 -0
- squirrels/_project.py +37 -12
- squirrels/_utils.py +4 -2
- squirrels/arguments.py +2 -0
- squirrels/connections.py +1 -0
- squirrels/dashboards.py +1 -82
- squirrels/data_sources.py +8 -563
- squirrels/parameter_options.py +8 -348
- squirrels/parameters.py +9 -1266
- squirrels/types.py +11 -0
- {squirrels-0.5.0b2.dist-info → squirrels-0.5.0b3.dist-info}/METADATA +1 -1
- squirrels-0.5.0b3.dist-info/RECORD +80 -0
- squirrels/package_data/base_project/macros/macros_example.sql +0 -15
- squirrels/package_data/base_project/models/federates/federate_example.py +0 -44
- squirrels/package_data/base_project/models/federates/federate_example.sql +0 -17
- squirrels/package_data/base_project/pyconfigs/connections.py +0 -14
- squirrels-0.5.0b2.dist-info/RECORD +0 -70
- /squirrels/{dataset_result.py → _dataset_types.py} +0 -0
- /squirrels/{package_data → _package_data}/base_project/.env +0 -0
- /squirrels/{package_data → _package_data}/base_project/.env.example +0 -0
- /squirrels/{package_data → _package_data}/base_project/assets/expenses.db +0 -0
- /squirrels/{package_data → _package_data}/base_project/assets/weather.db +0 -0
- /squirrels/{package_data → _package_data}/base_project/connections.yml +0 -0
- /squirrels/{package_data → _package_data}/base_project/docker/.dockerignore +0 -0
- /squirrels/{package_data → _package_data}/base_project/docker/Dockerfile +0 -0
- /squirrels/{package_data → _package_data}/base_project/docker/compose.yml +0 -0
- /squirrels/{package_data → _package_data}/base_project/duckdb_init.sql +0 -0
- /squirrels/{package_data/base_project/.gitignore → _package_data/base_project/gitignore} +0 -0
- /squirrels/{package_data → _package_data}/base_project/models/builds/build_example.yml +0 -0
- /squirrels/{package_data → _package_data}/base_project/models/dbviews/dbview_example.yml +0 -0
- /squirrels/{package_data → _package_data}/base_project/models/sources.yml +0 -0
- /squirrels/{package_data → _package_data}/base_project/seeds/seed_categories.csv +0 -0
- /squirrels/{package_data → _package_data}/base_project/seeds/seed_categories.yml +0 -0
- /squirrels/{package_data → _package_data}/base_project/seeds/seed_subcategories.csv +0 -0
- /squirrels/{package_data → _package_data}/base_project/seeds/seed_subcategories.yml +0 -0
- /squirrels/{package_data → _package_data}/base_project/squirrels.yml.j2 +0 -0
- /squirrels/{package_data → _package_data}/base_project/tmp/.gitignore +0 -0
- {squirrels-0.5.0b2.dist-info → squirrels-0.5.0b3.dist-info}/WHEEL +0 -0
- {squirrels-0.5.0b2.dist-info → squirrels-0.5.0b3.dist-info}/entry_points.txt +0 -0
- {squirrels-0.5.0b2.dist-info → squirrels-0.5.0b3.dist-info}/licenses/LICENSE +0 -0
squirrels/_initializer.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
from typing import Optional
|
|
2
2
|
from datetime import datetime
|
|
3
|
+
from pathlib import Path
|
|
3
4
|
import inquirer, os, shutil, secrets
|
|
4
5
|
|
|
5
6
|
from . import _constants as c, _utils as u
|
|
6
7
|
|
|
7
|
-
base_proj_dir =
|
|
8
|
+
base_proj_dir = Path(os.path.dirname(__file__), c.PACKAGE_DATA_FOLDER, c.BASE_PROJECT_FOLDER)
|
|
8
9
|
|
|
9
10
|
TMP_FOLDER = "tmp"
|
|
10
11
|
|
|
@@ -14,22 +15,23 @@ class Initializer:
|
|
|
14
15
|
self.project_name = project_name if not use_curr_dir else None
|
|
15
16
|
self.use_curr_dir = use_curr_dir
|
|
16
17
|
|
|
17
|
-
def _path_exists(self, filepath:
|
|
18
|
+
def _path_exists(self, filepath: Path) -> bool:
|
|
18
19
|
return os.path.exists(filepath)
|
|
19
20
|
|
|
20
|
-
def _files_have_same_content(self, file1:
|
|
21
|
+
def _files_have_same_content(self, file1: Path, file2: Path) -> bool:
|
|
21
22
|
with open(file1, 'rb') as f1, open(file2, 'rb') as f2:
|
|
22
23
|
return f1.read() == f2.read()
|
|
23
24
|
|
|
24
|
-
def _add_timestamp_to_filename(self, path:
|
|
25
|
+
def _add_timestamp_to_filename(self, path: Path) -> Path:
|
|
25
26
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
26
27
|
new_filename = f"{path.stem}_{timestamp}{path.suffix}"
|
|
27
28
|
return path.with_name(new_filename)
|
|
28
29
|
|
|
29
|
-
def _copy_file(self, filepath:
|
|
30
|
-
|
|
30
|
+
def _copy_file(self, filepath: Path, *, src_folder: str = "", src_file: Path | None = None):
|
|
31
|
+
src_file = src_file if src_file is not None else filepath
|
|
32
|
+
src_path = Path(base_proj_dir, src_folder, src_file)
|
|
31
33
|
|
|
32
|
-
filepath2 =
|
|
34
|
+
filepath2 = Path(self.project_name, filepath) if self.project_name else filepath
|
|
33
35
|
dest_dir = os.path.dirname(filepath2)
|
|
34
36
|
if dest_dir != "":
|
|
35
37
|
os.makedirs(dest_dir, exist_ok=True)
|
|
@@ -51,38 +53,38 @@ class Initializer:
|
|
|
51
53
|
shutil.copy(src_path, filepath2)
|
|
52
54
|
|
|
53
55
|
def _copy_macros_file(self, filepath: str):
|
|
54
|
-
self._copy_file(
|
|
56
|
+
self._copy_file(Path(c.MACROS_FOLDER, filepath))
|
|
55
57
|
|
|
56
58
|
def _copy_models_file(self, filepath: str):
|
|
57
|
-
self._copy_file(
|
|
59
|
+
self._copy_file(Path(c.MODELS_FOLDER, filepath))
|
|
58
60
|
|
|
59
61
|
def _copy_build_file(self, filepath: str):
|
|
60
|
-
self._copy_file(
|
|
62
|
+
self._copy_file(Path(c.MODELS_FOLDER, c.BUILDS_FOLDER, filepath))
|
|
61
63
|
|
|
62
64
|
def _copy_dbview_file(self, filepath: str):
|
|
63
|
-
self._copy_file(
|
|
65
|
+
self._copy_file(Path(c.MODELS_FOLDER, c.DBVIEWS_FOLDER, filepath))
|
|
64
66
|
|
|
65
67
|
def _copy_federate_file(self, filepath: str):
|
|
66
|
-
self._copy_file(
|
|
68
|
+
self._copy_file(Path(c.MODELS_FOLDER, c.FEDERATES_FOLDER, filepath))
|
|
67
69
|
|
|
68
70
|
def _copy_database_file(self, filepath: str):
|
|
69
|
-
self._copy_file(
|
|
71
|
+
self._copy_file(Path(c.DATABASE_FOLDER, filepath))
|
|
70
72
|
|
|
71
73
|
def _copy_pyconfig_file(self, filepath: str):
|
|
72
|
-
self._copy_file(
|
|
74
|
+
self._copy_file(Path(c.PYCONFIGS_FOLDER, filepath))
|
|
73
75
|
|
|
74
76
|
def _copy_seed_file(self, filepath: str):
|
|
75
|
-
self._copy_file(
|
|
77
|
+
self._copy_file(Path(c.SEEDS_FOLDER, filepath))
|
|
76
78
|
|
|
77
79
|
def _copy_dashboard_file(self, filepath: str):
|
|
78
|
-
self._copy_file(
|
|
80
|
+
self._copy_file(Path(c.DASHBOARDS_FOLDER, filepath))
|
|
79
81
|
|
|
80
82
|
def _create_manifest_file(self, has_connections: bool, has_parameters: bool):
|
|
81
83
|
def get_content(file_name: Optional[str]) -> str:
|
|
82
84
|
if file_name is None:
|
|
83
85
|
return ""
|
|
84
86
|
|
|
85
|
-
yaml_path =
|
|
87
|
+
yaml_path = Path(base_proj_dir, file_name)
|
|
86
88
|
return yaml_path.read_text()
|
|
87
89
|
|
|
88
90
|
file_name_dict = {
|
|
@@ -93,10 +95,10 @@ class Initializer:
|
|
|
93
95
|
|
|
94
96
|
manifest_template = get_content(c.MANIFEST_JINJA_FILE)
|
|
95
97
|
manifest_content = u.render_string(manifest_template, **substitutions)
|
|
96
|
-
output_path =
|
|
98
|
+
output_path = Path(base_proj_dir, TMP_FOLDER, c.MANIFEST_FILE)
|
|
97
99
|
output_path.write_text(manifest_content)
|
|
98
100
|
|
|
99
|
-
self._copy_file(
|
|
101
|
+
self._copy_file(Path(c.MANIFEST_FILE), src_folder=TMP_FOLDER)
|
|
100
102
|
|
|
101
103
|
def _copy_dotenv_files(self, admin_password: str | None = None):
|
|
102
104
|
substitutions = {
|
|
@@ -104,14 +106,17 @@ class Initializer:
|
|
|
104
106
|
"random_admin_password": admin_password if admin_password else secrets.token_urlsafe(8),
|
|
105
107
|
}
|
|
106
108
|
|
|
107
|
-
dotenv_path =
|
|
109
|
+
dotenv_path = Path(base_proj_dir, c.DOTENV_FILE)
|
|
108
110
|
contents = u.render_string(dotenv_path.read_text(), **substitutions)
|
|
109
111
|
|
|
110
|
-
output_path =
|
|
112
|
+
output_path = Path(base_proj_dir, TMP_FOLDER, c.DOTENV_FILE)
|
|
111
113
|
output_path.write_text(contents)
|
|
112
114
|
|
|
113
|
-
self._copy_file(
|
|
114
|
-
self._copy_file(
|
|
115
|
+
self._copy_file(Path(c.DOTENV_FILE), src_folder=TMP_FOLDER)
|
|
116
|
+
self._copy_file(Path(c.DOTENV_FILE + ".example"))
|
|
117
|
+
|
|
118
|
+
def _copy_gitignore_file(self):
|
|
119
|
+
self._copy_file(Path(c.GITIGNORE_FILE), src_file=Path("gitignore"))
|
|
115
120
|
|
|
116
121
|
def init_project(self, args):
|
|
117
122
|
options = ["connections", "parameters", "build", "federate", "dashboard", "admin_password"]
|
|
@@ -127,7 +132,7 @@ class Initializer:
|
|
|
127
132
|
self.project_name = answers['project_name']
|
|
128
133
|
|
|
129
134
|
answers = { x: getattr(args, x) for x in options }
|
|
130
|
-
if DASHBOARD
|
|
135
|
+
if answers.get(DASHBOARD) is not None:
|
|
131
136
|
answers[DASHBOARD] = (answers[DASHBOARD] == 'y') # convert 'y' or 'n' to boolean
|
|
132
137
|
|
|
133
138
|
if not args.use_defaults:
|
|
@@ -221,7 +226,7 @@ class Initializer:
|
|
|
221
226
|
self._copy_dotenv_files(admin_password)
|
|
222
227
|
self._create_manifest_file(connections_use_yaml, parameters_use_yaml)
|
|
223
228
|
|
|
224
|
-
self.
|
|
229
|
+
self._copy_gitignore_file()
|
|
225
230
|
|
|
226
231
|
if connections_use_py:
|
|
227
232
|
self._copy_pyconfig_file(c.CONNECTIONS_FILE)
|
|
@@ -272,7 +277,7 @@ class Initializer:
|
|
|
272
277
|
print(f"You may also run `sqrl get-file {c.GITIGNORE_FILE}` to add a sample {c.GITIGNORE_FILE} file to your project.")
|
|
273
278
|
print()
|
|
274
279
|
elif args.file_name == c.GITIGNORE_FILE:
|
|
275
|
-
self.
|
|
280
|
+
self._copy_gitignore_file()
|
|
276
281
|
elif args.file_name == c.MANIFEST_FILE:
|
|
277
282
|
self._create_manifest_file(not args.no_connections, args.parameters)
|
|
278
283
|
elif args.file_name in (c.USER_FILE, c.CONNECTIONS_FILE, c.PARAMETERS_FILE, c.CONTEXT_FILE):
|
squirrels/_manifest.py
CHANGED
|
@@ -39,7 +39,7 @@ class _ConfigWithNameBaseModel(BaseModel):
|
|
|
39
39
|
name: str
|
|
40
40
|
|
|
41
41
|
|
|
42
|
-
class
|
|
42
|
+
class ConnectionTypeEnum(Enum):
|
|
43
43
|
SQLALCHEMY = "sqlalchemy"
|
|
44
44
|
CONNECTORX = "connectorx"
|
|
45
45
|
ADBC = "adbc"
|
|
@@ -54,7 +54,7 @@ class ConnectionProperties(BaseModel):
|
|
|
54
54
|
uri: The URI for the connection
|
|
55
55
|
"""
|
|
56
56
|
label: str | None = None
|
|
57
|
-
type:
|
|
57
|
+
type: ConnectionTypeEnum = Field(default=ConnectionTypeEnum.SQLALCHEMY)
|
|
58
58
|
uri: str
|
|
59
59
|
sa_create_engine_args: dict[str, Any] = Field(default_factory=dict)
|
|
60
60
|
|
|
@@ -64,14 +64,14 @@ class ConnectionProperties(BaseModel):
|
|
|
64
64
|
Creates and caches a SQLAlchemy engine if the connection type is sqlalchemy.
|
|
65
65
|
Returns None for other connection types.
|
|
66
66
|
"""
|
|
67
|
-
if self.type ==
|
|
67
|
+
if self.type == ConnectionTypeEnum.SQLALCHEMY:
|
|
68
68
|
return create_engine(self.uri, **self.sa_create_engine_args)
|
|
69
69
|
else:
|
|
70
70
|
raise ValueError(f'Connection type "{self.type}" does not support engine property')
|
|
71
71
|
|
|
72
72
|
@cached_property
|
|
73
73
|
def dialect(self) -> str:
|
|
74
|
-
if self.type ==
|
|
74
|
+
if self.type == ConnectionTypeEnum.SQLALCHEMY:
|
|
75
75
|
dialect = self.engine.dialect.name
|
|
76
76
|
else:
|
|
77
77
|
url = urlparse(self.uri)
|
|
@@ -83,7 +83,7 @@ class ConnectionProperties(BaseModel):
|
|
|
83
83
|
|
|
84
84
|
@cached_property
|
|
85
85
|
def attach_uri_for_duckdb(self) -> str | None:
|
|
86
|
-
if self.type ==
|
|
86
|
+
if self.type == ConnectionTypeEnum.SQLALCHEMY:
|
|
87
87
|
url = self.engine.url
|
|
88
88
|
host = url.host
|
|
89
89
|
port = url.port
|
squirrels/_model_configs.py
CHANGED
|
@@ -57,7 +57,7 @@ class BuildModelConfig(QueryModelConfig):
|
|
|
57
57
|
else:
|
|
58
58
|
raise ValueError(f"Invalid materialization: {self.materialization}")
|
|
59
59
|
|
|
60
|
-
create_prefix = f"CREATE OR REPLACE {materialization} {model_name} AS\n"
|
|
60
|
+
create_prefix = f"CREATE OR REPLACE {materialization} {model_name} AS\n\n"
|
|
61
61
|
return create_prefix + select_query
|
|
62
62
|
|
|
63
63
|
|
|
@@ -70,5 +70,5 @@ class FederateModelConfig(QueryModelConfig):
|
|
|
70
70
|
|
|
71
71
|
def get_sql_for_create(self, model_name: str, select_query: str) -> str:
|
|
72
72
|
materialization = "TABLE" if self.eager else "VIEW"
|
|
73
|
-
create_prefix = f"CREATE {materialization} {model_name} AS\n"
|
|
73
|
+
create_prefix = f"CREATE {materialization} {model_name} AS\n\n"
|
|
74
74
|
return create_prefix + select_query
|
squirrels/_model_queries.py
CHANGED
|
@@ -3,7 +3,7 @@ from dataclasses import dataclass, field
|
|
|
3
3
|
from typing import Callable, Generic, TypeVar, Any
|
|
4
4
|
import polars as pl, pandas as pd
|
|
5
5
|
|
|
6
|
-
from .
|
|
6
|
+
from ._arguments._run_time_args import BuildModelArgs
|
|
7
7
|
from ._model_configs import ModelConfig
|
|
8
8
|
|
|
9
9
|
|
squirrels/_models.py
CHANGED
|
@@ -9,7 +9,7 @@ import polars as pl, pandas as pd, networkx as nx
|
|
|
9
9
|
|
|
10
10
|
from . import _constants as c, _utils as u, _py_module as pm, _model_queries as mq, _model_configs as mc, _sources as src, _api_response_models as arm
|
|
11
11
|
from ._exceptions import FileExecutionError, InvalidInputError
|
|
12
|
-
from .
|
|
12
|
+
from ._arguments._run_time_args import ContextArgs, ModelArgs, BuildModelArgs
|
|
13
13
|
from ._auth import BaseUser
|
|
14
14
|
from ._connection_set import ConnectionsArgs, ConnectionSet, ConnectionProperties
|
|
15
15
|
from ._manifest import DatasetConfig
|
|
@@ -253,8 +253,12 @@ class SourceModel(StaticModel):
|
|
|
253
253
|
connection_props = self.conn_set.get_connection(conn_name)
|
|
254
254
|
if isinstance(connection_props, ConnectionProperties):
|
|
255
255
|
dialect = connection_props.dialect
|
|
256
|
+
attach_uri = connection_props.attach_uri_for_duckdb
|
|
256
257
|
else:
|
|
257
|
-
raise u.ConfigurationError(f'Unable to use connection "{conn_name}" for source "{self.name}"')
|
|
258
|
+
raise u.ConfigurationError(f'Unable to use connection "{conn_name}" for source "{self.name}". Connection "{conn_name}" must be a ConnectionProperties object')
|
|
259
|
+
|
|
260
|
+
if attach_uri is None:
|
|
261
|
+
raise u.ConfigurationError(f'Loading to duckdb is not supported for source "{self.name}" since its connection "{conn_name}" uses an unsupported dialect')
|
|
258
262
|
|
|
259
263
|
result = u.run_duckdb_stmt(self.logger, local_conn, f"FROM (SHOW DATABASES) WHERE database_name = 'db_{conn_name}'").fetchone()
|
|
260
264
|
if result is None:
|
|
@@ -351,7 +355,7 @@ class QueryModel(DataModel):
|
|
|
351
355
|
def _get_compile_sql_model_args_from_ctx_args(
|
|
352
356
|
self, ctx: dict[str, Any], ctx_args: ContextArgs
|
|
353
357
|
) -> dict[str, Any]:
|
|
354
|
-
is_placeholder = lambda placeholder: placeholder in ctx_args.
|
|
358
|
+
is_placeholder = lambda placeholder: placeholder in ctx_args._placeholders_copy
|
|
355
359
|
kwargs = {
|
|
356
360
|
"proj_vars": ctx_args.proj_vars, "env_vars": ctx_args.env_vars, "user": ctx_args.user, "prms": ctx_args.prms,
|
|
357
361
|
"traits": ctx_args.traits, "ctx": ctx, "is_placeholder": is_placeholder, "set_placeholder": ctx_args.set_placeholder,
|
|
@@ -424,6 +428,11 @@ class QueryModel(DataModel):
|
|
|
424
428
|
dependent_model_names.add(self.name)
|
|
425
429
|
for dep_model in self.upstreams.values():
|
|
426
430
|
dep_model.retrieve_dependent_query_models(dependent_model_names)
|
|
431
|
+
|
|
432
|
+
def _log_sql_to_run(self, sql: str, placeholders: dict[str, Any]) -> None:
|
|
433
|
+
log_msg = f"SQL to run for model '{self.name}':\n{sql}"
|
|
434
|
+
log_msg += f"\n\n(with placeholders: {placeholders})"
|
|
435
|
+
self.logger.info(log_msg)
|
|
427
436
|
|
|
428
437
|
|
|
429
438
|
@dataclass
|
|
@@ -460,6 +469,7 @@ class DbviewModel(QueryModel):
|
|
|
460
469
|
return "{{ source(\"" + source_name + "\") }}"
|
|
461
470
|
|
|
462
471
|
kwargs["source"] = source
|
|
472
|
+
kwargs["ref"] = source
|
|
463
473
|
return kwargs
|
|
464
474
|
|
|
465
475
|
def _get_duckdb_query(self, read_dialect: str, query: str) -> str:
|
|
@@ -476,17 +486,15 @@ class DbviewModel(QueryModel):
|
|
|
476
486
|
connection_props = self.conn_set.get_connection(connection_name)
|
|
477
487
|
|
|
478
488
|
if self.model_config.translate_to_duckdb and isinstance(connection_props, ConnectionProperties):
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
"source": source_func, "ref": source_func
|
|
489
|
+
macros = {
|
|
490
|
+
"source": lambda source_name: "venv." + source_name
|
|
482
491
|
}
|
|
483
492
|
compiled_query2 = self._get_compiled_sql_query_str(compiled_query_str, macros)
|
|
484
493
|
compiled_query_str = self._get_duckdb_query(connection_props.dialect, compiled_query2)
|
|
485
494
|
is_duckdb = True
|
|
486
495
|
else:
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
"source": source_func, "ref": source_func
|
|
496
|
+
macros = {
|
|
497
|
+
"source": lambda source_name: self.sources[source_name].get_table()
|
|
490
498
|
}
|
|
491
499
|
compiled_query_str = self._get_compiled_sql_query_str(compiled_query_str, macros)
|
|
492
500
|
is_duckdb = False
|
|
@@ -520,7 +528,7 @@ class DbviewModel(QueryModel):
|
|
|
520
528
|
if is_duckdb:
|
|
521
529
|
local_conn = conn.cursor()
|
|
522
530
|
try:
|
|
523
|
-
self.logger.info(f"Running
|
|
531
|
+
self.logger.info(f"Running dbview '{self.name}' on duckdb")
|
|
524
532
|
return local_conn.sql(query, params=placeholders).pl()
|
|
525
533
|
except duckdb.CatalogException as e:
|
|
526
534
|
raise InvalidInputError(61, f'Model "{self.name}" depends on static data models that cannot be found.')
|
|
@@ -529,10 +537,12 @@ class DbviewModel(QueryModel):
|
|
|
529
537
|
finally:
|
|
530
538
|
local_conn.close()
|
|
531
539
|
else:
|
|
532
|
-
|
|
540
|
+
self.logger.info(f"Running dbview '{self.name}' on connection: {connection_name}")
|
|
541
|
+
return self.conn_set.run_sql_query_from_conn_name(query, connection_name, placeholders)
|
|
533
542
|
except RuntimeError as e:
|
|
534
543
|
raise FileExecutionError(f'Failed to run dbview sql model "{self.name}"', e)
|
|
535
544
|
|
|
545
|
+
self._log_sql_to_run(query, placeholders)
|
|
536
546
|
result = await asyncio.to_thread(run_sql_query_on_connection, is_duckdb, query, placeholders)
|
|
537
547
|
self.result = result.lazy()
|
|
538
548
|
|
|
@@ -582,7 +592,7 @@ class FederateModel(QueryModel):
|
|
|
582
592
|
connections = self.conn_set.get_connections_as_dict()
|
|
583
593
|
|
|
584
594
|
def run_external_sql(connection_name: str, sql_query: str) -> pl.DataFrame:
|
|
585
|
-
return self._run_sql_query_on_connection(connection_name, sql_query, ctx_args.
|
|
595
|
+
return self._run_sql_query_on_connection(connection_name, sql_query, ctx_args._placeholders_copy)
|
|
586
596
|
|
|
587
597
|
conn_args = ConnectionsArgs(ctx_args.project_path, ctx_args.proj_vars, ctx_args.env_vars)
|
|
588
598
|
build_model_args = BuildModelArgs(conn_args, connections, dependencies, self._ref_for_python, run_external_sql)
|
|
@@ -638,10 +648,12 @@ class FederateModel(QueryModel):
|
|
|
638
648
|
query = compiled_query.query
|
|
639
649
|
|
|
640
650
|
def create_table(local_conn: duckdb.DuckDBPyConnection):
|
|
641
|
-
|
|
642
|
-
|
|
651
|
+
# DuckDB doesn't support specifying named parameters that are not used in the query, so filtering them out
|
|
652
|
+
placeholder_exists = lambda key: re.search(r"\$" + key + r"(?!\w)", query)
|
|
653
|
+
existing_placeholders = {key: value for key, value in placeholders.items() if placeholder_exists(key)}
|
|
643
654
|
|
|
644
655
|
create_query = self.model_config.get_sql_for_create(self.name, query)
|
|
656
|
+
self._log_sql_to_run(create_query, existing_placeholders)
|
|
645
657
|
try:
|
|
646
658
|
return local_conn.execute(create_query, existing_placeholders)
|
|
647
659
|
except duckdb.CatalogException as e:
|
|
@@ -774,7 +786,7 @@ class BuildModel(StaticModel, QueryModel):
|
|
|
774
786
|
create_query = self.model_config.get_sql_for_build(self.name, query)
|
|
775
787
|
local_conn = conn.cursor()
|
|
776
788
|
try:
|
|
777
|
-
return u.run_duckdb_stmt(self.logger, local_conn, create_query)
|
|
789
|
+
return u.run_duckdb_stmt(self.logger, local_conn, create_query, model_name=self.name)
|
|
778
790
|
except Exception as e:
|
|
779
791
|
raise FileExecutionError(f'Failed to build static sql model "{self.name}"', e) from e
|
|
780
792
|
finally:
|
|
@@ -922,7 +934,7 @@ class DAG:
|
|
|
922
934
|
|
|
923
935
|
self._compile_models(context, ctx_args, recurse)
|
|
924
936
|
|
|
925
|
-
self.placeholders = ctx_args.
|
|
937
|
+
self.placeholders = ctx_args._placeholders_copy
|
|
926
938
|
if runquery:
|
|
927
939
|
await self._run_models()
|
|
928
940
|
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
from squirrels import
|
|
1
|
+
from squirrels import arguments as args, dashboards as d
|
|
2
2
|
from matplotlib import pyplot as plt, figure as f, axes as a
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
async def main(sqrl: DashboardArgs) -> d.PngDashboard:
|
|
6
|
-
spending_by_month_df = await sqrl.dataset("federate_dataset_example", fixed_parameters={"group_by": "
|
|
7
|
-
spending_by_subcategory_df = await sqrl.dataset("federate_dataset_example", fixed_parameters={"group_by": "
|
|
5
|
+
async def main(sqrl: args.DashboardArgs) -> d.PngDashboard:
|
|
6
|
+
spending_by_month_df = await sqrl.dataset("federate_dataset_example", fixed_parameters={"group_by": "month"})
|
|
7
|
+
spending_by_subcategory_df = await sqrl.dataset("federate_dataset_example", fixed_parameters={"group_by": "subcat"})
|
|
8
8
|
|
|
9
9
|
# Create a figure with two subplots
|
|
10
10
|
fig, (ax0, ax1) = plt.subplots(2, 1, figsize=(8, 8), height_ratios=(1, 2))
|
|
@@ -14,9 +14,9 @@ depends_on:
|
|
|
14
14
|
- name: dataset_example_month
|
|
15
15
|
dataset: federate_dataset_example
|
|
16
16
|
fixed_parameters:
|
|
17
|
-
- group_by:
|
|
17
|
+
- group_by: month (Month)
|
|
18
18
|
|
|
19
19
|
- name: dataset_example_subcategory
|
|
20
20
|
dataset: federate_dataset_example
|
|
21
21
|
fixed_parameters:
|
|
22
|
-
- group_by:
|
|
22
|
+
- group_by: subcat (Subcategory)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{%- macro date_and_amount_filters(use_from_range) -%}
|
|
2
|
+
{%- if use_from_range -%}
|
|
3
|
+
|
|
4
|
+
date >= {{ ctx.start_date_from_range | quote }}
|
|
5
|
+
AND date <= {{ ctx.end_date_from_range | quote }}
|
|
6
|
+
AND amount >= {{ ctx.min_amount_from_range }}
|
|
7
|
+
AND amount <= {{ ctx.max_amount_from_range }}
|
|
8
|
+
|
|
9
|
+
{%- else -%}
|
|
10
|
+
|
|
11
|
+
date >= {{ ctx.start_date | quote }}
|
|
12
|
+
AND date <= {{ ctx.end_date | quote }}
|
|
13
|
+
AND amount >= {{ ctx.min_amount }}
|
|
14
|
+
AND amount <= {{ ctx.max_amount }}
|
|
15
|
+
|
|
16
|
+
{%- endif -%}
|
|
17
|
+
{%- endmacro -%}
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
from squirrels import
|
|
1
|
+
from squirrels import arguments as args
|
|
2
2
|
import polars as pl, pandas as pd
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
def main(sqrl: BuildModelArgs) -> pl.LazyFrame | pl.DataFrame | pd.DataFrame:
|
|
5
|
+
def main(sqrl: args.BuildModelArgs) -> pl.LazyFrame | pl.DataFrame | pd.DataFrame:
|
|
6
6
|
"""
|
|
7
7
|
Create a build model by joining/processing sources or other build models to form a new
|
|
8
8
|
Python DataFrame (using polars LazyFrame, polars DataFrame, or pandas DataFrame).
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from squirrels import arguments as args
|
|
2
|
+
import polars as pl, pandas as pd
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def main(sqrl: args.ModelArgs) -> pl.LazyFrame | pl.DataFrame | pd.DataFrame:
|
|
6
|
+
"""
|
|
7
|
+
Create federated models by joining/processing dependent models (sources, seeds, builds, dbviews, other federates, etc.) to
|
|
8
|
+
form a new Python DataFrame (using polars LazyFrame, polars DataFrame, or pandas DataFrame).
|
|
9
|
+
"""
|
|
10
|
+
df = sqrl.ref("build_example")
|
|
11
|
+
|
|
12
|
+
df = df.filter(
|
|
13
|
+
(pl.col("date") >= sqrl.ctx["start_date_from_range"]) &
|
|
14
|
+
(pl.col("date") <= sqrl.ctx["end_date_from_range"]) &
|
|
15
|
+
(pl.col("amount") >= sqrl.ctx["min_amount_from_range"]) &
|
|
16
|
+
(pl.col("amount") <= sqrl.ctx["max_amount_from_range"])
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
if sqrl.ctx["has_categories"]:
|
|
20
|
+
categories: list[str] = sqrl.ctx["categories"]
|
|
21
|
+
df = df.filter(pl.col("category_id").is_in(categories))
|
|
22
|
+
|
|
23
|
+
if sqrl.ctx["has_subcategories"]:
|
|
24
|
+
subcategories: list[str] = sqrl.ctx["subcategories"]
|
|
25
|
+
df = df.filter(pl.col("subcategory_id").is_in(subcategories))
|
|
26
|
+
|
|
27
|
+
dimension_cols: list[str] = sqrl.ctx["group_by_cols"]
|
|
28
|
+
df = df.group_by(dimension_cols).agg(
|
|
29
|
+
pl.sum("amount").cast(pl.Decimal(precision=15, scale=2)).alias("total_amount")
|
|
30
|
+
)
|
|
31
|
+
df = df.rename(sqrl.ctx["rename_dict"])
|
|
32
|
+
|
|
33
|
+
order_by_cols: list[str] = sqrl.ctx["order_by_cols"]
|
|
34
|
+
df = df.select(*order_by_cols, "total_amount") \
|
|
35
|
+
.sort(order_by_cols, descending=True)
|
|
36
|
+
|
|
37
|
+
if "limit" in sqrl.ctx:
|
|
38
|
+
limit: int = sqrl.ctx["limit"]
|
|
39
|
+
df = df.limit(limit)
|
|
40
|
+
|
|
41
|
+
return df
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{#- DuckDB dialect -#}
|
|
2
|
+
|
|
3
|
+
SELECT {{ ctx.select_dim_cols | join }}
|
|
4
|
+
, CAST(SUM(amount) AS DECIMAL(15, 2)) as total_amount
|
|
5
|
+
|
|
6
|
+
{# ref() can be used on a sources, seeds, builds, dbviews, or other federate models -#}
|
|
7
|
+
FROM {{ ref("build_example") }} AS a
|
|
8
|
+
|
|
9
|
+
WHERE {{ date_and_amount_filters(use_from_range=true) }}
|
|
10
|
+
{%- if ctx.has_categories %}
|
|
11
|
+
AND category_id IN ({{ ctx.categories | quote_and_join }})
|
|
12
|
+
{%- endif %}
|
|
13
|
+
{%- if ctx.has_subcategories %}
|
|
14
|
+
AND subcategory_id IN ({{ ctx.subcategories | quote_and_join }})
|
|
15
|
+
{%- endif %}
|
|
16
|
+
|
|
17
|
+
GROUP BY {{ ctx.group_by_cols | join }}
|
|
18
|
+
|
|
19
|
+
ORDER BY {{ ctx.order_by_cols_desc | join }}
|
|
20
|
+
|
|
21
|
+
{%- if ctx.limit %}
|
|
22
|
+
|
|
23
|
+
LIMIT {{ ctx.limit }}
|
|
24
|
+
|
|
25
|
+
{%- endif %}
|
|
@@ -11,7 +11,7 @@ eager: false # optional - defaults to false. Only applies to SQL m
|
|
|
11
11
|
columns:
|
|
12
12
|
- name: date
|
|
13
13
|
type: string
|
|
14
|
-
condition: parameter 'group_by' (Group By) is '
|
|
14
|
+
condition: parameter 'group_by' (Group By) is 'trans' (Transaction)
|
|
15
15
|
description: The date of the transaction in 'YYYY-MM-DD' format, in descending order
|
|
16
16
|
category: dimension
|
|
17
17
|
depends_on:
|
|
@@ -19,7 +19,7 @@ columns:
|
|
|
19
19
|
|
|
20
20
|
- name: description
|
|
21
21
|
type: string
|
|
22
|
-
condition: parameter 'group_by' (Group By) is '
|
|
22
|
+
condition: parameter 'group_by' (Group By) is 'trans' (Transaction)
|
|
23
23
|
description: The description of the transaction
|
|
24
24
|
category: dimension
|
|
25
25
|
depends_on:
|
|
@@ -27,7 +27,7 @@ columns:
|
|
|
27
27
|
|
|
28
28
|
- name: day
|
|
29
29
|
type: string
|
|
30
|
-
condition: parameter 'group_by' (Group By) is '
|
|
30
|
+
condition: parameter 'group_by' (Group By) is 'day' (Day)
|
|
31
31
|
description: The day for which the amount is aggregated by, in descending order
|
|
32
32
|
category: dimension
|
|
33
33
|
depends_on:
|
|
@@ -35,7 +35,7 @@ columns:
|
|
|
35
35
|
|
|
36
36
|
- name: month
|
|
37
37
|
type: string
|
|
38
|
-
condition: parameter 'group_by' (Group By) is '
|
|
38
|
+
condition: parameter 'group_by' (Group By) is 'month' (Month)
|
|
39
39
|
description: The month for which the amount is aggregated by, in descending order
|
|
40
40
|
category: dimension
|
|
41
41
|
depends_on:
|
|
@@ -43,7 +43,7 @@ columns:
|
|
|
43
43
|
|
|
44
44
|
- name: category
|
|
45
45
|
type: string
|
|
46
|
-
condition: parameter `group_by` (Group By) is `
|
|
46
|
+
condition: parameter `group_by` (Group By) is `trans` (Transaction), `cat` (Category), or `subcat` (Subcategory)
|
|
47
47
|
description: The category for which the amount is aggregated by
|
|
48
48
|
category: dimension
|
|
49
49
|
depends_on:
|
|
@@ -51,7 +51,7 @@ columns:
|
|
|
51
51
|
|
|
52
52
|
- name: subcategory
|
|
53
53
|
type: string
|
|
54
|
-
condition: parameter `group_by` (Group By) is `
|
|
54
|
+
condition: parameter `group_by` (Group By) is `trans` (Transaction) or `subcat` (Subcategory)
|
|
55
55
|
description: The subcategory for which the amount is aggregated by
|
|
56
56
|
category: dimension
|
|
57
57
|
depends_on:
|
|
@@ -7,26 +7,27 @@ parameters:
|
|
|
7
7
|
description: Dimension(s) to aggregate by ## optional, default is empty string
|
|
8
8
|
user_attribute: role ## optional, default is null
|
|
9
9
|
all_options:
|
|
10
|
-
- id:
|
|
10
|
+
- id: trans
|
|
11
11
|
label: Transaction
|
|
12
|
-
columns: ["date", "category", "subcategory", "description"] ## custom field
|
|
12
|
+
columns: ["id", "date", "category", "subcategory", "description"] ## custom field
|
|
13
|
+
aliases: ["_id", "date", "category", "subcategory", "description"] ## custom field (any alias starting with "_" will not be selected - see context.py for implementation)
|
|
13
14
|
is_default: false ## optional, default, exists for SingleSelect or MultiSelect options only
|
|
14
15
|
user_groups: ["manager"] ## optional, default is empty list
|
|
15
16
|
parent_option_ids: [] ## optional, default, exists for all parameter options
|
|
16
|
-
- id:
|
|
17
|
+
- id: day
|
|
17
18
|
label: Day
|
|
18
19
|
columns: [date]
|
|
19
|
-
aliases: [day]
|
|
20
|
+
aliases: [day]
|
|
20
21
|
user_groups: ["manager", "employee"]
|
|
21
|
-
- id:
|
|
22
|
+
- id: month
|
|
22
23
|
label: Month
|
|
23
24
|
columns: [month]
|
|
24
25
|
user_groups: ["manager", "employee"]
|
|
25
|
-
- id:
|
|
26
|
+
- id: cat
|
|
26
27
|
label: Category
|
|
27
28
|
columns: [category]
|
|
28
29
|
user_groups: ["manager", "employee"]
|
|
29
|
-
- id:
|
|
30
|
+
- id: subcat
|
|
30
31
|
label: Subcategory
|
|
31
32
|
columns: [category, subcategory]
|
|
32
33
|
user_groups: ["manager", "employee"]
|
|
@@ -44,7 +45,7 @@ parameters:
|
|
|
44
45
|
max_value: 1000
|
|
45
46
|
increment: 10
|
|
46
47
|
default_value: 1000
|
|
47
|
-
parent_option_ids:
|
|
48
|
+
parent_option_ids: trans
|
|
48
49
|
|
|
49
50
|
- type: DateParameter
|
|
50
51
|
factory: CreateFromSource
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
from squirrels import arguments as args, connections as cn
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def main(connections: dict[str, cn.ConnectionProperties | Any], sqrl: args.ConnectionsArgs) -> None:
|
|
6
|
+
"""
|
|
7
|
+
Define sqlalchemy engines by adding them to the "connections" dictionary
|
|
8
|
+
"""
|
|
9
|
+
## SQLAlchemy URL for a connection engine
|
|
10
|
+
conn_str: str = sqrl.env_vars["SQLITE_URI"].format(project_path=sqrl.project_path)
|
|
11
|
+
|
|
12
|
+
## Assigning names to connection engines
|
|
13
|
+
connections["default"] = cn.ConnectionProperties(label="SQLite Expenses Database", type=cn.ConnectionTypeEnum.SQLALCHEMY, uri=conn_str)
|
|
14
|
+
|