squirrels 0.5.0b2__py3-none-any.whl → 0.5.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of squirrels might be problematic. Click here for more details.
- dateutils/__init__.py +6 -460
- dateutils/_enums.py +25 -0
- dateutils/_implementation.py +409 -0
- dateutils/types.py +6 -0
- squirrels/__init__.py +9 -13
- squirrels/_api_routes/__init__.py +5 -0
- squirrels/_api_routes/auth.py +262 -0
- squirrels/_api_routes/base.py +154 -0
- squirrels/_api_routes/dashboards.py +142 -0
- squirrels/_api_routes/data_management.py +103 -0
- squirrels/_api_routes/datasets.py +242 -0
- squirrels/_api_routes/oauth2.py +300 -0
- squirrels/_api_routes/project.py +214 -0
- squirrels/_api_server.py +145 -748
- squirrels/_arguments/__init__.py +0 -0
- squirrels/{arguments → _arguments}/init_time_args.py +7 -2
- squirrels/{arguments → _arguments}/run_time_args.py +4 -26
- squirrels/_auth.py +646 -93
- squirrels/_connection_set.py +5 -5
- squirrels/_constants.py +7 -1
- squirrels/{_dashboards_io.py → _dashboards.py} +87 -6
- squirrels/_data_sources.py +564 -0
- squirrels/_exceptions.py +9 -37
- squirrels/_initializer.py +31 -26
- squirrels/_manifest.py +5 -5
- squirrels/_model_builder.py +1 -1
- squirrels/_model_configs.py +2 -2
- squirrels/_model_queries.py +1 -1
- squirrels/_models.py +40 -27
- squirrels/{package_data → _package_data}/base_project/.env +1 -0
- squirrels/{package_data → _package_data}/base_project/.env.example +1 -0
- squirrels/{package_data → _package_data}/base_project/dashboards/dashboard_example.py +4 -4
- squirrels/{package_data → _package_data}/base_project/dashboards/dashboard_example.yml +2 -2
- squirrels/_package_data/base_project/macros/macros_example.sql +17 -0
- squirrels/{package_data → _package_data}/base_project/models/builds/build_example.py +2 -2
- squirrels/{package_data → _package_data}/base_project/models/builds/build_example.sql +1 -1
- squirrels/{package_data → _package_data}/base_project/models/dbviews/dbview_example.sql +1 -1
- squirrels/_package_data/base_project/models/federates/federate_example.py +41 -0
- squirrels/_package_data/base_project/models/federates/federate_example.sql +25 -0
- squirrels/{package_data → _package_data}/base_project/models/federates/federate_example.yml +6 -6
- squirrels/{package_data → _package_data}/base_project/parameters.yml +9 -8
- squirrels/_package_data/base_project/pyconfigs/connections.py +14 -0
- squirrels/{package_data → _package_data}/base_project/pyconfigs/context.py +14 -16
- squirrels/_package_data/base_project/pyconfigs/parameters.py +106 -0
- squirrels/_package_data/base_project/pyconfigs/user.py +51 -0
- squirrels/_package_data/templates/dataset_results.html +112 -0
- squirrels/_package_data/templates/oauth_login.html +271 -0
- squirrels/_parameter_configs.py +35 -35
- squirrels/_parameter_options.py +348 -0
- squirrels/_parameter_sets.py +47 -37
- squirrels/_parameters.py +1664 -0
- squirrels/_project.py +76 -32
- squirrels/_py_module.py +3 -2
- squirrels/_schemas/__init__.py +0 -0
- squirrels/_schemas/auth_models.py +144 -0
- squirrels/_schemas/query_param_models.py +67 -0
- squirrels/{_api_response_models.py → _schemas/response_models.py} +12 -8
- squirrels/_utils.py +38 -4
- squirrels/arguments.py +2 -0
- squirrels/auth.py +1 -0
- squirrels/connections.py +1 -0
- squirrels/dashboards.py +1 -82
- squirrels/data_sources.py +8 -563
- squirrels/parameter_options.py +8 -348
- squirrels/parameters.py +9 -1266
- squirrels/types.py +11 -0
- {squirrels-0.5.0b2.dist-info → squirrels-0.5.0b4.dist-info}/METADATA +4 -1
- squirrels-0.5.0b4.dist-info/RECORD +94 -0
- squirrels/package_data/base_project/macros/macros_example.sql +0 -15
- squirrels/package_data/base_project/models/federates/federate_example.py +0 -44
- squirrels/package_data/base_project/models/federates/federate_example.sql +0 -17
- squirrels/package_data/base_project/pyconfigs/connections.py +0 -14
- squirrels/package_data/base_project/pyconfigs/parameters.py +0 -93
- squirrels/package_data/base_project/pyconfigs/user.py +0 -23
- squirrels-0.5.0b2.dist-info/RECORD +0 -70
- /squirrels/{dataset_result.py → _dataset_types.py} +0 -0
- /squirrels/{package_data → _package_data}/base_project/assets/expenses.db +0 -0
- /squirrels/{package_data → _package_data}/base_project/assets/weather.db +0 -0
- /squirrels/{package_data → _package_data}/base_project/connections.yml +0 -0
- /squirrels/{package_data → _package_data}/base_project/docker/.dockerignore +0 -0
- /squirrels/{package_data → _package_data}/base_project/docker/Dockerfile +0 -0
- /squirrels/{package_data → _package_data}/base_project/docker/compose.yml +0 -0
- /squirrels/{package_data → _package_data}/base_project/duckdb_init.sql +0 -0
- /squirrels/{package_data/base_project/.gitignore → _package_data/base_project/gitignore} +0 -0
- /squirrels/{package_data → _package_data}/base_project/models/builds/build_example.yml +0 -0
- /squirrels/{package_data → _package_data}/base_project/models/dbviews/dbview_example.yml +0 -0
- /squirrels/{package_data → _package_data}/base_project/models/sources.yml +0 -0
- /squirrels/{package_data → _package_data}/base_project/seeds/seed_categories.csv +0 -0
- /squirrels/{package_data → _package_data}/base_project/seeds/seed_categories.yml +0 -0
- /squirrels/{package_data → _package_data}/base_project/seeds/seed_subcategories.csv +0 -0
- /squirrels/{package_data → _package_data}/base_project/seeds/seed_subcategories.yml +0 -0
- /squirrels/{package_data → _package_data}/base_project/squirrels.yml.j2 +0 -0
- /squirrels/{package_data → _package_data}/base_project/tmp/.gitignore +0 -0
- {squirrels-0.5.0b2.dist-info → squirrels-0.5.0b4.dist-info}/WHEEL +0 -0
- {squirrels-0.5.0b2.dist-info → squirrels-0.5.0b4.dist-info}/entry_points.txt +0 -0
- {squirrels-0.5.0b2.dist-info → squirrels-0.5.0b4.dist-info}/licenses/LICENSE +0 -0
squirrels/_initializer.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
from typing import Optional
|
|
2
2
|
from datetime import datetime
|
|
3
|
+
from pathlib import Path
|
|
3
4
|
import inquirer, os, shutil, secrets
|
|
4
5
|
|
|
5
6
|
from . import _constants as c, _utils as u
|
|
6
7
|
|
|
7
|
-
base_proj_dir =
|
|
8
|
+
base_proj_dir = Path(os.path.dirname(__file__), c.PACKAGE_DATA_FOLDER, c.BASE_PROJECT_FOLDER)
|
|
8
9
|
|
|
9
10
|
TMP_FOLDER = "tmp"
|
|
10
11
|
|
|
@@ -14,22 +15,23 @@ class Initializer:
|
|
|
14
15
|
self.project_name = project_name if not use_curr_dir else None
|
|
15
16
|
self.use_curr_dir = use_curr_dir
|
|
16
17
|
|
|
17
|
-
def _path_exists(self, filepath:
|
|
18
|
+
def _path_exists(self, filepath: Path) -> bool:
|
|
18
19
|
return os.path.exists(filepath)
|
|
19
20
|
|
|
20
|
-
def _files_have_same_content(self, file1:
|
|
21
|
+
def _files_have_same_content(self, file1: Path, file2: Path) -> bool:
|
|
21
22
|
with open(file1, 'rb') as f1, open(file2, 'rb') as f2:
|
|
22
23
|
return f1.read() == f2.read()
|
|
23
24
|
|
|
24
|
-
def _add_timestamp_to_filename(self, path:
|
|
25
|
+
def _add_timestamp_to_filename(self, path: Path) -> Path:
|
|
25
26
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
26
27
|
new_filename = f"{path.stem}_{timestamp}{path.suffix}"
|
|
27
28
|
return path.with_name(new_filename)
|
|
28
29
|
|
|
29
|
-
def _copy_file(self, filepath:
|
|
30
|
-
|
|
30
|
+
def _copy_file(self, filepath: Path, *, src_folder: str = "", src_file: Path | None = None):
|
|
31
|
+
src_file = src_file if src_file is not None else filepath
|
|
32
|
+
src_path = Path(base_proj_dir, src_folder, src_file)
|
|
31
33
|
|
|
32
|
-
filepath2 =
|
|
34
|
+
filepath2 = Path(self.project_name, filepath) if self.project_name else filepath
|
|
33
35
|
dest_dir = os.path.dirname(filepath2)
|
|
34
36
|
if dest_dir != "":
|
|
35
37
|
os.makedirs(dest_dir, exist_ok=True)
|
|
@@ -51,38 +53,38 @@ class Initializer:
|
|
|
51
53
|
shutil.copy(src_path, filepath2)
|
|
52
54
|
|
|
53
55
|
def _copy_macros_file(self, filepath: str):
|
|
54
|
-
self._copy_file(
|
|
56
|
+
self._copy_file(Path(c.MACROS_FOLDER, filepath))
|
|
55
57
|
|
|
56
58
|
def _copy_models_file(self, filepath: str):
|
|
57
|
-
self._copy_file(
|
|
59
|
+
self._copy_file(Path(c.MODELS_FOLDER, filepath))
|
|
58
60
|
|
|
59
61
|
def _copy_build_file(self, filepath: str):
|
|
60
|
-
self._copy_file(
|
|
62
|
+
self._copy_file(Path(c.MODELS_FOLDER, c.BUILDS_FOLDER, filepath))
|
|
61
63
|
|
|
62
64
|
def _copy_dbview_file(self, filepath: str):
|
|
63
|
-
self._copy_file(
|
|
65
|
+
self._copy_file(Path(c.MODELS_FOLDER, c.DBVIEWS_FOLDER, filepath))
|
|
64
66
|
|
|
65
67
|
def _copy_federate_file(self, filepath: str):
|
|
66
|
-
self._copy_file(
|
|
68
|
+
self._copy_file(Path(c.MODELS_FOLDER, c.FEDERATES_FOLDER, filepath))
|
|
67
69
|
|
|
68
70
|
def _copy_database_file(self, filepath: str):
|
|
69
|
-
self._copy_file(
|
|
71
|
+
self._copy_file(Path(c.DATABASE_FOLDER, filepath))
|
|
70
72
|
|
|
71
73
|
def _copy_pyconfig_file(self, filepath: str):
|
|
72
|
-
self._copy_file(
|
|
74
|
+
self._copy_file(Path(c.PYCONFIGS_FOLDER, filepath))
|
|
73
75
|
|
|
74
76
|
def _copy_seed_file(self, filepath: str):
|
|
75
|
-
self._copy_file(
|
|
77
|
+
self._copy_file(Path(c.SEEDS_FOLDER, filepath))
|
|
76
78
|
|
|
77
79
|
def _copy_dashboard_file(self, filepath: str):
|
|
78
|
-
self._copy_file(
|
|
80
|
+
self._copy_file(Path(c.DASHBOARDS_FOLDER, filepath))
|
|
79
81
|
|
|
80
82
|
def _create_manifest_file(self, has_connections: bool, has_parameters: bool):
|
|
81
83
|
def get_content(file_name: Optional[str]) -> str:
|
|
82
84
|
if file_name is None:
|
|
83
85
|
return ""
|
|
84
86
|
|
|
85
|
-
yaml_path =
|
|
87
|
+
yaml_path = Path(base_proj_dir, file_name)
|
|
86
88
|
return yaml_path.read_text()
|
|
87
89
|
|
|
88
90
|
file_name_dict = {
|
|
@@ -93,10 +95,10 @@ class Initializer:
|
|
|
93
95
|
|
|
94
96
|
manifest_template = get_content(c.MANIFEST_JINJA_FILE)
|
|
95
97
|
manifest_content = u.render_string(manifest_template, **substitutions)
|
|
96
|
-
output_path =
|
|
98
|
+
output_path = Path(base_proj_dir, TMP_FOLDER, c.MANIFEST_FILE)
|
|
97
99
|
output_path.write_text(manifest_content)
|
|
98
100
|
|
|
99
|
-
self._copy_file(
|
|
101
|
+
self._copy_file(Path(c.MANIFEST_FILE), src_folder=TMP_FOLDER)
|
|
100
102
|
|
|
101
103
|
def _copy_dotenv_files(self, admin_password: str | None = None):
|
|
102
104
|
substitutions = {
|
|
@@ -104,14 +106,17 @@ class Initializer:
|
|
|
104
106
|
"random_admin_password": admin_password if admin_password else secrets.token_urlsafe(8),
|
|
105
107
|
}
|
|
106
108
|
|
|
107
|
-
dotenv_path =
|
|
109
|
+
dotenv_path = Path(base_proj_dir, c.DOTENV_FILE)
|
|
108
110
|
contents = u.render_string(dotenv_path.read_text(), **substitutions)
|
|
109
111
|
|
|
110
|
-
output_path =
|
|
112
|
+
output_path = Path(base_proj_dir, TMP_FOLDER, c.DOTENV_FILE)
|
|
111
113
|
output_path.write_text(contents)
|
|
112
114
|
|
|
113
|
-
self._copy_file(
|
|
114
|
-
self._copy_file(
|
|
115
|
+
self._copy_file(Path(c.DOTENV_FILE), src_folder=TMP_FOLDER)
|
|
116
|
+
self._copy_file(Path(c.DOTENV_FILE + ".example"))
|
|
117
|
+
|
|
118
|
+
def _copy_gitignore_file(self):
|
|
119
|
+
self._copy_file(Path(c.GITIGNORE_FILE), src_file=Path("gitignore"))
|
|
115
120
|
|
|
116
121
|
def init_project(self, args):
|
|
117
122
|
options = ["connections", "parameters", "build", "federate", "dashboard", "admin_password"]
|
|
@@ -127,7 +132,7 @@ class Initializer:
|
|
|
127
132
|
self.project_name = answers['project_name']
|
|
128
133
|
|
|
129
134
|
answers = { x: getattr(args, x) for x in options }
|
|
130
|
-
if DASHBOARD
|
|
135
|
+
if answers.get(DASHBOARD) is not None:
|
|
131
136
|
answers[DASHBOARD] = (answers[DASHBOARD] == 'y') # convert 'y' or 'n' to boolean
|
|
132
137
|
|
|
133
138
|
if not args.use_defaults:
|
|
@@ -221,7 +226,7 @@ class Initializer:
|
|
|
221
226
|
self._copy_dotenv_files(admin_password)
|
|
222
227
|
self._create_manifest_file(connections_use_yaml, parameters_use_yaml)
|
|
223
228
|
|
|
224
|
-
self.
|
|
229
|
+
self._copy_gitignore_file()
|
|
225
230
|
|
|
226
231
|
if connections_use_py:
|
|
227
232
|
self._copy_pyconfig_file(c.CONNECTIONS_FILE)
|
|
@@ -272,7 +277,7 @@ class Initializer:
|
|
|
272
277
|
print(f"You may also run `sqrl get-file {c.GITIGNORE_FILE}` to add a sample {c.GITIGNORE_FILE} file to your project.")
|
|
273
278
|
print()
|
|
274
279
|
elif args.file_name == c.GITIGNORE_FILE:
|
|
275
|
-
self.
|
|
280
|
+
self._copy_gitignore_file()
|
|
276
281
|
elif args.file_name == c.MANIFEST_FILE:
|
|
277
282
|
self._create_manifest_file(not args.no_connections, args.parameters)
|
|
278
283
|
elif args.file_name in (c.USER_FILE, c.CONNECTIONS_FILE, c.PARAMETERS_FILE, c.CONTEXT_FILE):
|
squirrels/_manifest.py
CHANGED
|
@@ -39,7 +39,7 @@ class _ConfigWithNameBaseModel(BaseModel):
|
|
|
39
39
|
name: str
|
|
40
40
|
|
|
41
41
|
|
|
42
|
-
class
|
|
42
|
+
class ConnectionTypeEnum(Enum):
|
|
43
43
|
SQLALCHEMY = "sqlalchemy"
|
|
44
44
|
CONNECTORX = "connectorx"
|
|
45
45
|
ADBC = "adbc"
|
|
@@ -54,7 +54,7 @@ class ConnectionProperties(BaseModel):
|
|
|
54
54
|
uri: The URI for the connection
|
|
55
55
|
"""
|
|
56
56
|
label: str | None = None
|
|
57
|
-
type:
|
|
57
|
+
type: ConnectionTypeEnum = Field(default=ConnectionTypeEnum.SQLALCHEMY)
|
|
58
58
|
uri: str
|
|
59
59
|
sa_create_engine_args: dict[str, Any] = Field(default_factory=dict)
|
|
60
60
|
|
|
@@ -64,14 +64,14 @@ class ConnectionProperties(BaseModel):
|
|
|
64
64
|
Creates and caches a SQLAlchemy engine if the connection type is sqlalchemy.
|
|
65
65
|
Returns None for other connection types.
|
|
66
66
|
"""
|
|
67
|
-
if self.type ==
|
|
67
|
+
if self.type == ConnectionTypeEnum.SQLALCHEMY:
|
|
68
68
|
return create_engine(self.uri, **self.sa_create_engine_args)
|
|
69
69
|
else:
|
|
70
70
|
raise ValueError(f'Connection type "{self.type}" does not support engine property')
|
|
71
71
|
|
|
72
72
|
@cached_property
|
|
73
73
|
def dialect(self) -> str:
|
|
74
|
-
if self.type ==
|
|
74
|
+
if self.type == ConnectionTypeEnum.SQLALCHEMY:
|
|
75
75
|
dialect = self.engine.dialect.name
|
|
76
76
|
else:
|
|
77
77
|
url = urlparse(self.uri)
|
|
@@ -83,7 +83,7 @@ class ConnectionProperties(BaseModel):
|
|
|
83
83
|
|
|
84
84
|
@cached_property
|
|
85
85
|
def attach_uri_for_duckdb(self) -> str | None:
|
|
86
|
-
if self.type ==
|
|
86
|
+
if self.type == ConnectionTypeEnum.SQLALCHEMY:
|
|
87
87
|
url = self.engine.url
|
|
88
88
|
host = url.host
|
|
89
89
|
port = url.port
|
squirrels/_model_builder.py
CHANGED
|
@@ -68,7 +68,7 @@ class ModelBuilder:
|
|
|
68
68
|
# If the development copy is already in use, a concurrent build is not allowed
|
|
69
69
|
duckdb_dev_lock_path = u.Path(self._duckdb_venv_path + ".dev.lock")
|
|
70
70
|
if duckdb_dev_lock_path.exists():
|
|
71
|
-
raise InvalidInputError(
|
|
71
|
+
raise InvalidInputError(409, "Concurrent build not allowed", "An existing build process is already running and a concurrent build is not allowed")
|
|
72
72
|
duckdb_dev_lock_path.touch(exist_ok=False)
|
|
73
73
|
|
|
74
74
|
# Ensure the lock file is deleted even if an exception is raised
|
squirrels/_model_configs.py
CHANGED
|
@@ -57,7 +57,7 @@ class BuildModelConfig(QueryModelConfig):
|
|
|
57
57
|
else:
|
|
58
58
|
raise ValueError(f"Invalid materialization: {self.materialization}")
|
|
59
59
|
|
|
60
|
-
create_prefix = f"CREATE OR REPLACE {materialization} {model_name} AS\n"
|
|
60
|
+
create_prefix = f"CREATE OR REPLACE {materialization} {model_name} AS\n\n"
|
|
61
61
|
return create_prefix + select_query
|
|
62
62
|
|
|
63
63
|
|
|
@@ -70,5 +70,5 @@ class FederateModelConfig(QueryModelConfig):
|
|
|
70
70
|
|
|
71
71
|
def get_sql_for_create(self, model_name: str, select_query: str) -> str:
|
|
72
72
|
materialization = "TABLE" if self.eager else "VIEW"
|
|
73
|
-
create_prefix = f"CREATE {materialization} {model_name} AS\n"
|
|
73
|
+
create_prefix = f"CREATE {materialization} {model_name} AS\n\n"
|
|
74
74
|
return create_prefix + select_query
|
squirrels/_model_queries.py
CHANGED
|
@@ -3,7 +3,7 @@ from dataclasses import dataclass, field
|
|
|
3
3
|
from typing import Callable, Generic, TypeVar, Any
|
|
4
4
|
import polars as pl, pandas as pd
|
|
5
5
|
|
|
6
|
-
from .
|
|
6
|
+
from ._arguments.run_time_args import BuildModelArgs
|
|
7
7
|
from ._model_configs import ModelConfig
|
|
8
8
|
|
|
9
9
|
|
squirrels/_models.py
CHANGED
|
@@ -7,9 +7,10 @@ from pathlib import Path
|
|
|
7
7
|
import asyncio, os, re, time, duckdb, sqlglot
|
|
8
8
|
import polars as pl, pandas as pd, networkx as nx
|
|
9
9
|
|
|
10
|
-
from . import _constants as c, _utils as u, _py_module as pm, _model_queries as mq, _model_configs as mc, _sources as src
|
|
10
|
+
from . import _constants as c, _utils as u, _py_module as pm, _model_queries as mq, _model_configs as mc, _sources as src
|
|
11
|
+
from ._schemas import response_models as rm
|
|
11
12
|
from ._exceptions import FileExecutionError, InvalidInputError
|
|
12
|
-
from .
|
|
13
|
+
from ._arguments.run_time_args import ContextArgs, ModelArgs, BuildModelArgs
|
|
13
14
|
from ._auth import BaseUser
|
|
14
15
|
from ._connection_set import ConnectionsArgs, ConnectionSet, ConnectionProperties
|
|
15
16
|
from ._manifest import DatasetConfig
|
|
@@ -173,7 +174,7 @@ class StaticModel(DataModel):
|
|
|
173
174
|
try:
|
|
174
175
|
return self._load_duckdb_view_to_python_df(local_conn, use_venv=True)
|
|
175
176
|
except Exception as e:
|
|
176
|
-
raise InvalidInputError(
|
|
177
|
+
raise InvalidInputError(409, f'Dependent data model not found.', f'Model "{self.name}" depends on static data models that cannot be found. Trying building the virtual data environment first.')
|
|
177
178
|
finally:
|
|
178
179
|
local_conn.close()
|
|
179
180
|
|
|
@@ -253,8 +254,12 @@ class SourceModel(StaticModel):
|
|
|
253
254
|
connection_props = self.conn_set.get_connection(conn_name)
|
|
254
255
|
if isinstance(connection_props, ConnectionProperties):
|
|
255
256
|
dialect = connection_props.dialect
|
|
257
|
+
attach_uri = connection_props.attach_uri_for_duckdb
|
|
256
258
|
else:
|
|
257
|
-
raise u.ConfigurationError(f'Unable to use connection "{conn_name}" for source "{self.name}"')
|
|
259
|
+
raise u.ConfigurationError(f'Unable to use connection "{conn_name}" for source "{self.name}". Connection "{conn_name}" must be a ConnectionProperties object')
|
|
260
|
+
|
|
261
|
+
if attach_uri is None:
|
|
262
|
+
raise u.ConfigurationError(f'Loading to duckdb is not supported for source "{self.name}" since its connection "{conn_name}" uses an unsupported dialect')
|
|
258
263
|
|
|
259
264
|
result = u.run_duckdb_stmt(self.logger, local_conn, f"FROM (SHOW DATABASES) WHERE database_name = 'db_{conn_name}'").fetchone()
|
|
260
265
|
if result is None:
|
|
@@ -351,7 +356,7 @@ class QueryModel(DataModel):
|
|
|
351
356
|
def _get_compile_sql_model_args_from_ctx_args(
|
|
352
357
|
self, ctx: dict[str, Any], ctx_args: ContextArgs
|
|
353
358
|
) -> dict[str, Any]:
|
|
354
|
-
is_placeholder = lambda placeholder: placeholder in ctx_args.
|
|
359
|
+
is_placeholder = lambda placeholder: placeholder in ctx_args._placeholders_copy
|
|
355
360
|
kwargs = {
|
|
356
361
|
"proj_vars": ctx_args.proj_vars, "env_vars": ctx_args.env_vars, "user": ctx_args.user, "prms": ctx_args.prms,
|
|
357
362
|
"traits": ctx_args.traits, "ctx": ctx, "is_placeholder": is_placeholder, "set_placeholder": ctx_args.set_placeholder,
|
|
@@ -424,6 +429,11 @@ class QueryModel(DataModel):
|
|
|
424
429
|
dependent_model_names.add(self.name)
|
|
425
430
|
for dep_model in self.upstreams.values():
|
|
426
431
|
dep_model.retrieve_dependent_query_models(dependent_model_names)
|
|
432
|
+
|
|
433
|
+
def _log_sql_to_run(self, sql: str, placeholders: dict[str, Any]) -> None:
|
|
434
|
+
log_msg = f"SQL to run for model '{self.name}':\n{sql}"
|
|
435
|
+
log_msg += f"\n\n(with placeholders: {placeholders})"
|
|
436
|
+
self.logger.info(log_msg)
|
|
427
437
|
|
|
428
438
|
|
|
429
439
|
@dataclass
|
|
@@ -460,6 +470,7 @@ class DbviewModel(QueryModel):
|
|
|
460
470
|
return "{{ source(\"" + source_name + "\") }}"
|
|
461
471
|
|
|
462
472
|
kwargs["source"] = source
|
|
473
|
+
kwargs["ref"] = source
|
|
463
474
|
return kwargs
|
|
464
475
|
|
|
465
476
|
def _get_duckdb_query(self, read_dialect: str, query: str) -> str:
|
|
@@ -476,17 +487,15 @@ class DbviewModel(QueryModel):
|
|
|
476
487
|
connection_props = self.conn_set.get_connection(connection_name)
|
|
477
488
|
|
|
478
489
|
if self.model_config.translate_to_duckdb and isinstance(connection_props, ConnectionProperties):
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
"source": source_func, "ref": source_func
|
|
490
|
+
macros = {
|
|
491
|
+
"source": lambda source_name: "venv." + source_name
|
|
482
492
|
}
|
|
483
493
|
compiled_query2 = self._get_compiled_sql_query_str(compiled_query_str, macros)
|
|
484
494
|
compiled_query_str = self._get_duckdb_query(connection_props.dialect, compiled_query2)
|
|
485
495
|
is_duckdb = True
|
|
486
496
|
else:
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
"source": source_func, "ref": source_func
|
|
497
|
+
macros = {
|
|
498
|
+
"source": lambda source_name: self.sources[source_name].get_table()
|
|
490
499
|
}
|
|
491
500
|
compiled_query_str = self._get_compiled_sql_query_str(compiled_query_str, macros)
|
|
492
501
|
is_duckdb = False
|
|
@@ -520,19 +529,21 @@ class DbviewModel(QueryModel):
|
|
|
520
529
|
if is_duckdb:
|
|
521
530
|
local_conn = conn.cursor()
|
|
522
531
|
try:
|
|
523
|
-
self.logger.info(f"Running
|
|
532
|
+
self.logger.info(f"Running dbview '{self.name}' on duckdb")
|
|
524
533
|
return local_conn.sql(query, params=placeholders).pl()
|
|
525
534
|
except duckdb.CatalogException as e:
|
|
526
|
-
raise InvalidInputError(
|
|
535
|
+
raise InvalidInputError(409, f'Dependent data model not found.', f'Model "{self.name}" depends on static data models that cannot be found. Trying building the virtual data environment first.')
|
|
527
536
|
except Exception as e:
|
|
528
537
|
raise RuntimeError(e)
|
|
529
538
|
finally:
|
|
530
539
|
local_conn.close()
|
|
531
540
|
else:
|
|
532
|
-
|
|
541
|
+
self.logger.info(f"Running dbview '{self.name}' on connection: {connection_name}")
|
|
542
|
+
return self.conn_set.run_sql_query_from_conn_name(query, connection_name, placeholders)
|
|
533
543
|
except RuntimeError as e:
|
|
534
544
|
raise FileExecutionError(f'Failed to run dbview sql model "{self.name}"', e)
|
|
535
545
|
|
|
546
|
+
self._log_sql_to_run(query, placeholders)
|
|
536
547
|
result = await asyncio.to_thread(run_sql_query_on_connection, is_duckdb, query, placeholders)
|
|
537
548
|
self.result = result.lazy()
|
|
538
549
|
|
|
@@ -582,7 +593,7 @@ class FederateModel(QueryModel):
|
|
|
582
593
|
connections = self.conn_set.get_connections_as_dict()
|
|
583
594
|
|
|
584
595
|
def run_external_sql(connection_name: str, sql_query: str) -> pl.DataFrame:
|
|
585
|
-
return self._run_sql_query_on_connection(connection_name, sql_query, ctx_args.
|
|
596
|
+
return self._run_sql_query_on_connection(connection_name, sql_query, ctx_args._placeholders_copy)
|
|
586
597
|
|
|
587
598
|
conn_args = ConnectionsArgs(ctx_args.project_path, ctx_args.proj_vars, ctx_args.env_vars)
|
|
588
599
|
build_model_args = BuildModelArgs(conn_args, connections, dependencies, self._ref_for_python, run_external_sql)
|
|
@@ -638,17 +649,19 @@ class FederateModel(QueryModel):
|
|
|
638
649
|
query = compiled_query.query
|
|
639
650
|
|
|
640
651
|
def create_table(local_conn: duckdb.DuckDBPyConnection):
|
|
641
|
-
|
|
642
|
-
|
|
652
|
+
# DuckDB doesn't support specifying named parameters that are not used in the query, so filtering them out
|
|
653
|
+
placeholder_exists = lambda key: re.search(r"\$" + key + r"(?!\w)", query)
|
|
654
|
+
existing_placeholders = {key: value for key, value in placeholders.items() if placeholder_exists(key)}
|
|
643
655
|
|
|
644
656
|
create_query = self.model_config.get_sql_for_create(self.name, query)
|
|
657
|
+
self._log_sql_to_run(create_query, existing_placeholders)
|
|
645
658
|
try:
|
|
646
659
|
return local_conn.execute(create_query, existing_placeholders)
|
|
647
660
|
except duckdb.CatalogException as e:
|
|
648
|
-
raise InvalidInputError(
|
|
661
|
+
raise InvalidInputError(409, f'Dependent data model not found.', f'Model "{self.name}" depends on static data models that cannot be found. Trying building the virtual data environment first.')
|
|
649
662
|
except Exception as e:
|
|
650
663
|
if self.name == "__fake_target":
|
|
651
|
-
raise InvalidInputError(
|
|
664
|
+
raise InvalidInputError(400, "Invalid SQL query", f"Failed to run provided SQL query")
|
|
652
665
|
else:
|
|
653
666
|
raise FileExecutionError(f'Failed to run federate sql model "{self.name}"', e) from e
|
|
654
667
|
|
|
@@ -774,7 +787,7 @@ class BuildModel(StaticModel, QueryModel):
|
|
|
774
787
|
create_query = self.model_config.get_sql_for_build(self.name, query)
|
|
775
788
|
local_conn = conn.cursor()
|
|
776
789
|
try:
|
|
777
|
-
return u.run_duckdb_stmt(self.logger, local_conn, create_query)
|
|
790
|
+
return u.run_duckdb_stmt(self.logger, local_conn, create_query, model_name=self.name)
|
|
778
791
|
except Exception as e:
|
|
779
792
|
raise FileExecutionError(f'Failed to build static sql model "{self.name}"', e) from e
|
|
780
793
|
finally:
|
|
@@ -922,7 +935,7 @@ class DAG:
|
|
|
922
935
|
|
|
923
936
|
self._compile_models(context, ctx_args, recurse)
|
|
924
937
|
|
|
925
|
-
self.placeholders = ctx_args.
|
|
938
|
+
self.placeholders = ctx_args._placeholders_copy
|
|
926
939
|
if runquery:
|
|
927
940
|
await self._run_models()
|
|
928
941
|
|
|
@@ -948,24 +961,24 @@ class DAG:
|
|
|
948
961
|
|
|
949
962
|
return G
|
|
950
963
|
|
|
951
|
-
def get_all_data_models(self) -> list[
|
|
964
|
+
def get_all_data_models(self) -> list[rm.DataModelItem]:
|
|
952
965
|
data_models = []
|
|
953
966
|
for model_name, model in self.models_dict.items():
|
|
954
967
|
is_queryable = model.is_queryable
|
|
955
|
-
data_model =
|
|
968
|
+
data_model = rm.DataModelItem(name=model_name, model_type=model.model_type.value, config=model.model_config, is_queryable=is_queryable)
|
|
956
969
|
data_models.append(data_model)
|
|
957
970
|
return data_models
|
|
958
971
|
|
|
959
|
-
def get_all_model_lineage(self) -> list[
|
|
972
|
+
def get_all_model_lineage(self) -> list[rm.LineageRelation]:
|
|
960
973
|
model_lineage = []
|
|
961
974
|
for model_name, model in self.models_dict.items():
|
|
962
975
|
if not isinstance(model, QueryModel):
|
|
963
976
|
continue
|
|
964
977
|
for dep_model_name in model.model_config.depends_on:
|
|
965
978
|
edge_type = "buildtime" if isinstance(model, BuildModel) else "runtime"
|
|
966
|
-
source_model =
|
|
967
|
-
target_model =
|
|
968
|
-
model_lineage.append(
|
|
979
|
+
source_model = rm.LineageNode(name=dep_model_name, type="model")
|
|
980
|
+
target_model = rm.LineageNode(name=model_name, type="model")
|
|
981
|
+
model_lineage.append(rm.LineageRelation(type=edge_type, source=source_model, target=target_model))
|
|
969
982
|
return model_lineage
|
|
970
983
|
|
|
971
984
|
|
|
@@ -10,6 +10,7 @@ SQRL_SECRET__ADMIN_PASSWORD="{{ random_admin_password }}"
|
|
|
10
10
|
# (default values are shown below)
|
|
11
11
|
SQRL_AUTH__DB_FILE_PATH="target/auth.sqlite"
|
|
12
12
|
SQRL_AUTH__TOKEN_EXPIRE_MINUTES="30"
|
|
13
|
+
SQRL_AUTH__ALLOWED_ORIGINS_FOR_COOKIES="https://squirrels-analytics.github.io"
|
|
13
14
|
|
|
14
15
|
SQRL_PARAMETERS__CACHE_SIZE="1024"
|
|
15
16
|
SQRL_PARAMETERS__CACHE_TTL_MINUTES="60"
|
|
@@ -10,6 +10,7 @@ SQRL_SECRET__ADMIN_PASSWORD=""
|
|
|
10
10
|
# (default values are shown below)
|
|
11
11
|
SQRL_AUTH__DB_FILE_PATH="target/auth.sqlite"
|
|
12
12
|
SQRL_AUTH__TOKEN_EXPIRE_MINUTES="30"
|
|
13
|
+
SQRL_AUTH__ALLOWED_ORIGINS_FOR_COOKIES="https://squirrels-analytics.github.io"
|
|
13
14
|
|
|
14
15
|
SQRL_PARAMETERS__CACHE_SIZE="1024"
|
|
15
16
|
SQRL_PARAMETERS__CACHE_TTL_MINUTES="60"
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
from squirrels import
|
|
1
|
+
from squirrels import arguments as args, dashboards as d
|
|
2
2
|
from matplotlib import pyplot as plt, figure as f, axes as a
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
async def main(sqrl: DashboardArgs) -> d.PngDashboard:
|
|
6
|
-
spending_by_month_df = await sqrl.dataset("federate_dataset_example", fixed_parameters={"group_by": "
|
|
7
|
-
spending_by_subcategory_df = await sqrl.dataset("federate_dataset_example", fixed_parameters={"group_by": "
|
|
5
|
+
async def main(sqrl: args.DashboardArgs) -> d.PngDashboard:
|
|
6
|
+
spending_by_month_df = await sqrl.dataset("federate_dataset_example", fixed_parameters={"group_by": "month"})
|
|
7
|
+
spending_by_subcategory_df = await sqrl.dataset("federate_dataset_example", fixed_parameters={"group_by": "subcat"})
|
|
8
8
|
|
|
9
9
|
# Create a figure with two subplots
|
|
10
10
|
fig, (ax0, ax1) = plt.subplots(2, 1, figsize=(8, 8), height_ratios=(1, 2))
|
|
@@ -14,9 +14,9 @@ depends_on:
|
|
|
14
14
|
- name: dataset_example_month
|
|
15
15
|
dataset: federate_dataset_example
|
|
16
16
|
fixed_parameters:
|
|
17
|
-
- group_by:
|
|
17
|
+
- group_by: month (Month)
|
|
18
18
|
|
|
19
19
|
- name: dataset_example_subcategory
|
|
20
20
|
dataset: federate_dataset_example
|
|
21
21
|
fixed_parameters:
|
|
22
|
-
- group_by:
|
|
22
|
+
- group_by: subcat (Subcategory)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{%- macro date_and_amount_filters(use_from_range) -%}
|
|
2
|
+
{%- if use_from_range -%}
|
|
3
|
+
|
|
4
|
+
date >= {{ ctx.start_date_from_range | quote }}
|
|
5
|
+
AND date <= {{ ctx.end_date_from_range | quote }}
|
|
6
|
+
AND amount >= {{ ctx.min_amount_from_range }}
|
|
7
|
+
AND amount <= {{ ctx.max_amount_from_range }}
|
|
8
|
+
|
|
9
|
+
{%- else -%}
|
|
10
|
+
|
|
11
|
+
date >= {{ ctx.start_date | quote }}
|
|
12
|
+
AND date <= {{ ctx.end_date | quote }}
|
|
13
|
+
AND amount >= {{ ctx.min_amount }}
|
|
14
|
+
AND amount <= {{ ctx.max_amount }}
|
|
15
|
+
|
|
16
|
+
{%- endif -%}
|
|
17
|
+
{%- endmacro -%}
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
from squirrels import
|
|
1
|
+
from squirrels import arguments as args
|
|
2
2
|
import polars as pl, pandas as pd
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
def main(sqrl: BuildModelArgs) -> pl.LazyFrame | pl.DataFrame | pd.DataFrame:
|
|
5
|
+
def main(sqrl: args.BuildModelArgs) -> pl.LazyFrame | pl.DataFrame | pd.DataFrame:
|
|
6
6
|
"""
|
|
7
7
|
Create a build model by joining/processing sources or other build models to form a new
|
|
8
8
|
Python DataFrame (using polars LazyFrame, polars DataFrame, or pandas DataFrame).
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from squirrels import arguments as args
|
|
2
|
+
import polars as pl, pandas as pd
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def main(sqrl: args.ModelArgs) -> pl.LazyFrame | pl.DataFrame | pd.DataFrame:
|
|
6
|
+
"""
|
|
7
|
+
Create federated models by joining/processing dependent models (sources, seeds, builds, dbviews, other federates, etc.) to
|
|
8
|
+
form a new Python DataFrame (using polars LazyFrame, polars DataFrame, or pandas DataFrame).
|
|
9
|
+
"""
|
|
10
|
+
df = sqrl.ref("build_example")
|
|
11
|
+
|
|
12
|
+
df = df.filter(
|
|
13
|
+
(pl.col("date") >= sqrl.ctx["start_date_from_range"]) &
|
|
14
|
+
(pl.col("date") <= sqrl.ctx["end_date_from_range"]) &
|
|
15
|
+
(pl.col("amount") >= sqrl.ctx["min_amount_from_range"]) &
|
|
16
|
+
(pl.col("amount") <= sqrl.ctx["max_amount_from_range"])
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
if sqrl.ctx["has_categories"]:
|
|
20
|
+
categories: list[str] = sqrl.ctx["categories"]
|
|
21
|
+
df = df.filter(pl.col("category_id").is_in(categories))
|
|
22
|
+
|
|
23
|
+
if sqrl.ctx["has_subcategories"]:
|
|
24
|
+
subcategories: list[str] = sqrl.ctx["subcategories"]
|
|
25
|
+
df = df.filter(pl.col("subcategory_id").is_in(subcategories))
|
|
26
|
+
|
|
27
|
+
dimension_cols: list[str] = sqrl.ctx["group_by_cols"]
|
|
28
|
+
df = df.group_by(dimension_cols).agg(
|
|
29
|
+
pl.sum("amount").cast(pl.Decimal(precision=15, scale=2)).alias("total_amount")
|
|
30
|
+
)
|
|
31
|
+
df = df.rename(sqrl.ctx["rename_dict"])
|
|
32
|
+
|
|
33
|
+
order_by_cols: list[str] = sqrl.ctx["order_by_cols"]
|
|
34
|
+
df = df.select(*order_by_cols, "total_amount") \
|
|
35
|
+
.sort(order_by_cols, descending=True)
|
|
36
|
+
|
|
37
|
+
if "limit" in sqrl.ctx:
|
|
38
|
+
limit: int = sqrl.ctx["limit"]
|
|
39
|
+
df = df.limit(limit)
|
|
40
|
+
|
|
41
|
+
return df
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{#- DuckDB dialect -#}
|
|
2
|
+
|
|
3
|
+
SELECT {{ ctx.select_dim_cols | join }}
|
|
4
|
+
, CAST(SUM(amount) AS DECIMAL(15, 2)) as total_amount
|
|
5
|
+
|
|
6
|
+
{# ref() can be used on a sources, seeds, builds, dbviews, or other federate models -#}
|
|
7
|
+
FROM {{ ref("build_example") }} AS a
|
|
8
|
+
|
|
9
|
+
WHERE {{ date_and_amount_filters(use_from_range=true) }}
|
|
10
|
+
{%- if ctx.has_categories %}
|
|
11
|
+
AND category_id IN ({{ ctx.categories | quote_and_join }})
|
|
12
|
+
{%- endif %}
|
|
13
|
+
{%- if ctx.has_subcategories %}
|
|
14
|
+
AND subcategory_id IN ({{ ctx.subcategories | quote_and_join }})
|
|
15
|
+
{%- endif %}
|
|
16
|
+
|
|
17
|
+
GROUP BY {{ ctx.group_by_cols | join }}
|
|
18
|
+
|
|
19
|
+
ORDER BY {{ ctx.order_by_cols_desc | join }}
|
|
20
|
+
|
|
21
|
+
{%- if ctx.limit %}
|
|
22
|
+
|
|
23
|
+
LIMIT {{ ctx.limit }}
|
|
24
|
+
|
|
25
|
+
{%- endif %}
|
|
@@ -11,7 +11,7 @@ eager: false # optional - defaults to false. Only applies to SQL m
|
|
|
11
11
|
columns:
|
|
12
12
|
- name: date
|
|
13
13
|
type: string
|
|
14
|
-
condition: parameter 'group_by' (Group By) is '
|
|
14
|
+
condition: parameter 'group_by' (Group By) is 'trans' (Transaction)
|
|
15
15
|
description: The date of the transaction in 'YYYY-MM-DD' format, in descending order
|
|
16
16
|
category: dimension
|
|
17
17
|
depends_on:
|
|
@@ -19,7 +19,7 @@ columns:
|
|
|
19
19
|
|
|
20
20
|
- name: description
|
|
21
21
|
type: string
|
|
22
|
-
condition: parameter 'group_by' (Group By) is '
|
|
22
|
+
condition: parameter 'group_by' (Group By) is 'trans' (Transaction)
|
|
23
23
|
description: The description of the transaction
|
|
24
24
|
category: dimension
|
|
25
25
|
depends_on:
|
|
@@ -27,7 +27,7 @@ columns:
|
|
|
27
27
|
|
|
28
28
|
- name: day
|
|
29
29
|
type: string
|
|
30
|
-
condition: parameter 'group_by' (Group By) is '
|
|
30
|
+
condition: parameter 'group_by' (Group By) is 'day' (Day)
|
|
31
31
|
description: The day for which the amount is aggregated by, in descending order
|
|
32
32
|
category: dimension
|
|
33
33
|
depends_on:
|
|
@@ -35,7 +35,7 @@ columns:
|
|
|
35
35
|
|
|
36
36
|
- name: month
|
|
37
37
|
type: string
|
|
38
|
-
condition: parameter 'group_by' (Group By) is '
|
|
38
|
+
condition: parameter 'group_by' (Group By) is 'month' (Month)
|
|
39
39
|
description: The month for which the amount is aggregated by, in descending order
|
|
40
40
|
category: dimension
|
|
41
41
|
depends_on:
|
|
@@ -43,7 +43,7 @@ columns:
|
|
|
43
43
|
|
|
44
44
|
- name: category
|
|
45
45
|
type: string
|
|
46
|
-
condition: parameter `group_by` (Group By) is `
|
|
46
|
+
condition: parameter `group_by` (Group By) is `trans` (Transaction), `cat` (Category), or `subcat` (Subcategory)
|
|
47
47
|
description: The category for which the amount is aggregated by
|
|
48
48
|
category: dimension
|
|
49
49
|
depends_on:
|
|
@@ -51,7 +51,7 @@ columns:
|
|
|
51
51
|
|
|
52
52
|
- name: subcategory
|
|
53
53
|
type: string
|
|
54
|
-
condition: parameter `group_by` (Group By) is `
|
|
54
|
+
condition: parameter `group_by` (Group By) is `trans` (Transaction) or `subcat` (Subcategory)
|
|
55
55
|
description: The subcategory for which the amount is aggregated by
|
|
56
56
|
category: dimension
|
|
57
57
|
depends_on:
|