squirrels 0.5.0b1__py3-none-any.whl → 0.5.0b3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dateutils/__init__.py +6 -0
- dateutils/_enums.py +25 -0
- squirrels/dateutils.py → dateutils/_implementation.py +58 -111
- dateutils/types.py +6 -0
- squirrels/__init__.py +7 -13
- squirrels/_api_server.py +5 -5
- squirrels/{arguments/init_time_args.py → _arguments/_init_time_args.py} +2 -2
- squirrels/{arguments/run_time_args.py → _arguments/_run_time_args.py} +4 -26
- squirrels/_auth.py +2 -2
- squirrels/_command_line.py +13 -9
- squirrels/_connection_set.py +5 -5
- squirrels/_constants.py +1 -1
- squirrels/_dashboard_types.py +82 -0
- squirrels/_dashboards_io.py +2 -2
- squirrels/_data_sources.py +564 -0
- squirrels/_exceptions.py +1 -1
- squirrels/_initializer.py +82 -58
- squirrels/_manifest.py +5 -5
- squirrels/_model_builder.py +2 -0
- squirrels/_model_configs.py +3 -3
- squirrels/_model_queries.py +1 -1
- squirrels/_models.py +28 -14
- squirrels/{package_data → _package_data}/base_project/dashboards/dashboard_example.py +4 -4
- squirrels/{package_data → _package_data}/base_project/dashboards/dashboard_example.yml +2 -2
- squirrels/_package_data/base_project/macros/macros_example.sql +17 -0
- squirrels/{package_data → _package_data}/base_project/models/builds/build_example.py +2 -2
- squirrels/{package_data → _package_data}/base_project/models/builds/build_example.sql +1 -1
- squirrels/{package_data → _package_data}/base_project/models/builds/build_example.yml +2 -0
- squirrels/{package_data → _package_data}/base_project/models/dbviews/dbview_example.sql +1 -1
- squirrels/_package_data/base_project/models/federates/federate_example.py +41 -0
- squirrels/_package_data/base_project/models/federates/federate_example.sql +25 -0
- squirrels/{package_data → _package_data}/base_project/models/federates/federate_example.yml +6 -6
- squirrels/{package_data → _package_data}/base_project/parameters.yml +9 -8
- squirrels/_package_data/base_project/pyconfigs/connections.py +14 -0
- squirrels/{package_data → _package_data}/base_project/pyconfigs/context.py +14 -16
- squirrels/{package_data → _package_data}/base_project/pyconfigs/parameters.py +13 -8
- squirrels/{package_data → _package_data}/base_project/pyconfigs/user.py +2 -2
- squirrels/_parameter_configs.py +34 -34
- squirrels/_parameter_options.py +348 -0
- squirrels/_parameter_sets.py +18 -18
- squirrels/_parameters.py +1266 -0
- squirrels/_project.py +37 -12
- squirrels/_utils.py +5 -3
- squirrels/arguments.py +2 -0
- squirrels/connections.py +1 -0
- squirrels/dashboards.py +1 -82
- squirrels/data_sources.py +8 -563
- squirrels/parameter_options.py +8 -348
- squirrels/parameters.py +9 -1266
- squirrels/types.py +11 -0
- {squirrels-0.5.0b1.dist-info → squirrels-0.5.0b3.dist-info}/METADATA +11 -17
- squirrels-0.5.0b3.dist-info/RECORD +80 -0
- squirrels/package_data/base_project/macros/macros_example.sql +0 -15
- squirrels/package_data/base_project/models/federates/federate_example.py +0 -44
- squirrels/package_data/base_project/models/federates/federate_example.sql +0 -17
- squirrels/package_data/base_project/pyconfigs/connections.py +0 -14
- squirrels-0.5.0b1.dist-info/RECORD +0 -70
- /squirrels/{dataset_result.py → _dataset_types.py} +0 -0
- /squirrels/{package_data → _package_data}/base_project/.env +0 -0
- /squirrels/{package_data → _package_data}/base_project/.env.example +0 -0
- /squirrels/{package_data → _package_data}/base_project/assets/expenses.db +0 -0
- /squirrels/{package_data → _package_data}/base_project/assets/weather.db +0 -0
- /squirrels/{package_data → _package_data}/base_project/connections.yml +0 -0
- /squirrels/{package_data → _package_data}/base_project/docker/.dockerignore +0 -0
- /squirrels/{package_data → _package_data}/base_project/docker/Dockerfile +0 -0
- /squirrels/{package_data → _package_data}/base_project/docker/compose.yml +0 -0
- /squirrels/{package_data → _package_data}/base_project/duckdb_init.sql +0 -0
- /squirrels/{package_data/base_project/.gitignore → _package_data/base_project/gitignore} +0 -0
- /squirrels/{package_data → _package_data}/base_project/models/dbviews/dbview_example.yml +0 -0
- /squirrels/{package_data → _package_data}/base_project/models/sources.yml +0 -0
- /squirrels/{package_data → _package_data}/base_project/seeds/seed_categories.csv +0 -0
- /squirrels/{package_data → _package_data}/base_project/seeds/seed_categories.yml +0 -0
- /squirrels/{package_data → _package_data}/base_project/seeds/seed_subcategories.csv +0 -0
- /squirrels/{package_data → _package_data}/base_project/seeds/seed_subcategories.yml +0 -0
- /squirrels/{package_data → _package_data}/base_project/squirrels.yml.j2 +0 -0
- /squirrels/{package_data → _package_data}/base_project/tmp/.gitignore +0 -0
- {squirrels-0.5.0b1.dist-info → squirrels-0.5.0b3.dist-info}/WHEEL +0 -0
- {squirrels-0.5.0b1.dist-info → squirrels-0.5.0b3.dist-info}/entry_points.txt +0 -0
- {squirrels-0.5.0b1.dist-info → squirrels-0.5.0b3.dist-info}/licenses/LICENSE +0 -0
squirrels/_initializer.py
CHANGED
|
@@ -1,35 +1,37 @@
|
|
|
1
1
|
from typing import Optional
|
|
2
2
|
from datetime import datetime
|
|
3
|
+
from pathlib import Path
|
|
3
4
|
import inquirer, os, shutil, secrets
|
|
4
5
|
|
|
5
6
|
from . import _constants as c, _utils as u
|
|
6
7
|
|
|
7
|
-
base_proj_dir =
|
|
8
|
+
base_proj_dir = Path(os.path.dirname(__file__), c.PACKAGE_DATA_FOLDER, c.BASE_PROJECT_FOLDER)
|
|
8
9
|
|
|
9
10
|
TMP_FOLDER = "tmp"
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
class Initializer:
|
|
13
|
-
def __init__(self, *, project_name: Optional[str] = None,
|
|
14
|
-
self.project_name = project_name
|
|
15
|
-
self.
|
|
14
|
+
def __init__(self, *, project_name: Optional[str] = None, use_curr_dir: bool = False):
|
|
15
|
+
self.project_name = project_name if not use_curr_dir else None
|
|
16
|
+
self.use_curr_dir = use_curr_dir
|
|
16
17
|
|
|
17
|
-
def _path_exists(self, filepath:
|
|
18
|
+
def _path_exists(self, filepath: Path) -> bool:
|
|
18
19
|
return os.path.exists(filepath)
|
|
19
20
|
|
|
20
|
-
def _files_have_same_content(self, file1:
|
|
21
|
+
def _files_have_same_content(self, file1: Path, file2: Path) -> bool:
|
|
21
22
|
with open(file1, 'rb') as f1, open(file2, 'rb') as f2:
|
|
22
23
|
return f1.read() == f2.read()
|
|
23
24
|
|
|
24
|
-
def _add_timestamp_to_filename(self, path:
|
|
25
|
+
def _add_timestamp_to_filename(self, path: Path) -> Path:
|
|
25
26
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
26
27
|
new_filename = f"{path.stem}_{timestamp}{path.suffix}"
|
|
27
28
|
return path.with_name(new_filename)
|
|
28
29
|
|
|
29
|
-
def _copy_file(self, filepath:
|
|
30
|
-
|
|
30
|
+
def _copy_file(self, filepath: Path, *, src_folder: str = "", src_file: Path | None = None):
|
|
31
|
+
src_file = src_file if src_file is not None else filepath
|
|
32
|
+
src_path = Path(base_proj_dir, src_folder, src_file)
|
|
31
33
|
|
|
32
|
-
filepath2 =
|
|
34
|
+
filepath2 = Path(self.project_name, filepath) if self.project_name else filepath
|
|
33
35
|
dest_dir = os.path.dirname(filepath2)
|
|
34
36
|
if dest_dir != "":
|
|
35
37
|
os.makedirs(dest_dir, exist_ok=True)
|
|
@@ -40,8 +42,6 @@ class Initializer:
|
|
|
40
42
|
if self._files_have_same_content(src_path, filepath2):
|
|
41
43
|
perform_copy = False
|
|
42
44
|
extra_msg = "Skipping... file contents is same as source"
|
|
43
|
-
elif self.overwrite:
|
|
44
|
-
extra_msg = "Overwriting file..."
|
|
45
45
|
else:
|
|
46
46
|
filepath2 = self._add_timestamp_to_filename(old_filepath)
|
|
47
47
|
extra_msg = f'Creating file as "{filepath2}" instead...'
|
|
@@ -53,38 +53,38 @@ class Initializer:
|
|
|
53
53
|
shutil.copy(src_path, filepath2)
|
|
54
54
|
|
|
55
55
|
def _copy_macros_file(self, filepath: str):
|
|
56
|
-
self._copy_file(
|
|
56
|
+
self._copy_file(Path(c.MACROS_FOLDER, filepath))
|
|
57
57
|
|
|
58
58
|
def _copy_models_file(self, filepath: str):
|
|
59
|
-
self._copy_file(
|
|
59
|
+
self._copy_file(Path(c.MODELS_FOLDER, filepath))
|
|
60
60
|
|
|
61
61
|
def _copy_build_file(self, filepath: str):
|
|
62
|
-
self._copy_file(
|
|
62
|
+
self._copy_file(Path(c.MODELS_FOLDER, c.BUILDS_FOLDER, filepath))
|
|
63
63
|
|
|
64
64
|
def _copy_dbview_file(self, filepath: str):
|
|
65
|
-
self._copy_file(
|
|
65
|
+
self._copy_file(Path(c.MODELS_FOLDER, c.DBVIEWS_FOLDER, filepath))
|
|
66
66
|
|
|
67
67
|
def _copy_federate_file(self, filepath: str):
|
|
68
|
-
self._copy_file(
|
|
68
|
+
self._copy_file(Path(c.MODELS_FOLDER, c.FEDERATES_FOLDER, filepath))
|
|
69
69
|
|
|
70
70
|
def _copy_database_file(self, filepath: str):
|
|
71
|
-
self._copy_file(
|
|
71
|
+
self._copy_file(Path(c.DATABASE_FOLDER, filepath))
|
|
72
72
|
|
|
73
73
|
def _copy_pyconfig_file(self, filepath: str):
|
|
74
|
-
self._copy_file(
|
|
74
|
+
self._copy_file(Path(c.PYCONFIGS_FOLDER, filepath))
|
|
75
75
|
|
|
76
76
|
def _copy_seed_file(self, filepath: str):
|
|
77
|
-
self._copy_file(
|
|
77
|
+
self._copy_file(Path(c.SEEDS_FOLDER, filepath))
|
|
78
78
|
|
|
79
79
|
def _copy_dashboard_file(self, filepath: str):
|
|
80
|
-
self._copy_file(
|
|
80
|
+
self._copy_file(Path(c.DASHBOARDS_FOLDER, filepath))
|
|
81
81
|
|
|
82
82
|
def _create_manifest_file(self, has_connections: bool, has_parameters: bool):
|
|
83
83
|
def get_content(file_name: Optional[str]) -> str:
|
|
84
84
|
if file_name is None:
|
|
85
85
|
return ""
|
|
86
86
|
|
|
87
|
-
yaml_path =
|
|
87
|
+
yaml_path = Path(base_proj_dir, file_name)
|
|
88
88
|
return yaml_path.read_text()
|
|
89
89
|
|
|
90
90
|
file_name_dict = {
|
|
@@ -95,10 +95,10 @@ class Initializer:
|
|
|
95
95
|
|
|
96
96
|
manifest_template = get_content(c.MANIFEST_JINJA_FILE)
|
|
97
97
|
manifest_content = u.render_string(manifest_template, **substitutions)
|
|
98
|
-
output_path =
|
|
98
|
+
output_path = Path(base_proj_dir, TMP_FOLDER, c.MANIFEST_FILE)
|
|
99
99
|
output_path.write_text(manifest_content)
|
|
100
100
|
|
|
101
|
-
self._copy_file(
|
|
101
|
+
self._copy_file(Path(c.MANIFEST_FILE), src_folder=TMP_FOLDER)
|
|
102
102
|
|
|
103
103
|
def _copy_dotenv_files(self, admin_password: str | None = None):
|
|
104
104
|
substitutions = {
|
|
@@ -106,52 +106,76 @@ class Initializer:
|
|
|
106
106
|
"random_admin_password": admin_password if admin_password else secrets.token_urlsafe(8),
|
|
107
107
|
}
|
|
108
108
|
|
|
109
|
-
dotenv_path =
|
|
109
|
+
dotenv_path = Path(base_proj_dir, c.DOTENV_FILE)
|
|
110
110
|
contents = u.render_string(dotenv_path.read_text(), **substitutions)
|
|
111
111
|
|
|
112
|
-
output_path =
|
|
112
|
+
output_path = Path(base_proj_dir, TMP_FOLDER, c.DOTENV_FILE)
|
|
113
113
|
output_path.write_text(contents)
|
|
114
114
|
|
|
115
|
-
self._copy_file(
|
|
116
|
-
self._copy_file(
|
|
115
|
+
self._copy_file(Path(c.DOTENV_FILE), src_folder=TMP_FOLDER)
|
|
116
|
+
self._copy_file(Path(c.DOTENV_FILE + ".example"))
|
|
117
|
+
|
|
118
|
+
def _copy_gitignore_file(self):
|
|
119
|
+
self._copy_file(Path(c.GITIGNORE_FILE), src_file=Path("gitignore"))
|
|
117
120
|
|
|
118
121
|
def init_project(self, args):
|
|
119
|
-
options = ["
|
|
120
|
-
|
|
122
|
+
options = ["connections", "parameters", "build", "federate", "dashboard", "admin_password"]
|
|
123
|
+
CONNECTIONS, PARAMETERS, BUILD, FEDERATE, DASHBOARD, ADMIN_PASSWORD = options
|
|
121
124
|
|
|
122
125
|
# Add project name prompt if not provided
|
|
123
|
-
if self.project_name is None:
|
|
126
|
+
if self.project_name is None and not args.curr_dir:
|
|
124
127
|
questions = [
|
|
125
|
-
inquirer.Text('project_name', message="What is your project name? (leave blank to create in current directory)")
|
|
128
|
+
inquirer.Text('project_name', message="What is your project folder name? (leave blank to create in current directory)")
|
|
126
129
|
]
|
|
127
130
|
answers = inquirer.prompt(questions)
|
|
128
131
|
assert isinstance(answers, dict)
|
|
129
132
|
self.project_name = answers['project_name']
|
|
130
133
|
|
|
131
134
|
answers = { x: getattr(args, x) for x in options }
|
|
132
|
-
if
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
)
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
)
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
135
|
+
if answers.get(DASHBOARD) is not None:
|
|
136
|
+
answers[DASHBOARD] = (answers[DASHBOARD] == 'y') # convert 'y' or 'n' to boolean
|
|
137
|
+
|
|
138
|
+
if not args.use_defaults:
|
|
139
|
+
questions = []
|
|
140
|
+
if answers.get(CONNECTIONS) is None:
|
|
141
|
+
questions.append(
|
|
142
|
+
inquirer.List(
|
|
143
|
+
CONNECTIONS, message=f"How would you like to configure the database connections?", choices=c.CONF_FORMAT_CHOICES
|
|
144
|
+
),
|
|
145
|
+
)
|
|
146
|
+
if answers.get(PARAMETERS) is None:
|
|
147
|
+
questions.append(
|
|
148
|
+
inquirer.List(
|
|
149
|
+
PARAMETERS, message=f"How would you like to configure the parameters?", choices=c.CONF_FORMAT_CHOICES2
|
|
150
|
+
),
|
|
151
|
+
)
|
|
152
|
+
if answers.get(BUILD) is None:
|
|
153
|
+
questions.append(
|
|
154
|
+
inquirer.List(
|
|
155
|
+
BUILD, message="What's the file format for the build model?", choices=c.FILE_TYPE_CHOICES
|
|
156
|
+
),
|
|
157
|
+
)
|
|
158
|
+
if answers.get(FEDERATE) is None:
|
|
159
|
+
questions.append(
|
|
160
|
+
inquirer.List(
|
|
161
|
+
FEDERATE, message="What's the file format for the federated model?", choices=c.FILE_TYPE_CHOICES
|
|
162
|
+
),
|
|
163
|
+
)
|
|
164
|
+
if answers.get(DASHBOARD) is None:
|
|
165
|
+
questions.append(
|
|
166
|
+
inquirer.Confirm(
|
|
167
|
+
DASHBOARD, message=f"Do you want to include a dashboard example?", default=False
|
|
168
|
+
),
|
|
169
|
+
)
|
|
170
|
+
if answers.get(ADMIN_PASSWORD) is None:
|
|
171
|
+
questions.append(
|
|
172
|
+
inquirer.Password(
|
|
173
|
+
"admin_password", message="What's the admin password? (leave blank to generate a random one)"
|
|
174
|
+
),
|
|
175
|
+
)
|
|
176
|
+
more_answers = inquirer.prompt(questions)
|
|
177
|
+
assert isinstance(more_answers, dict)
|
|
178
|
+
answers.update(more_answers)
|
|
155
179
|
|
|
156
180
|
def get_answer(key, default):
|
|
157
181
|
"""
|
|
@@ -177,7 +201,7 @@ class Initializer:
|
|
|
177
201
|
parameters_use_py = (parameters_format == c.PYTHON_FORMAT)
|
|
178
202
|
|
|
179
203
|
build_config_file = c.BUILD_FILE_STEM + ".yml"
|
|
180
|
-
build_format = get_answer(BUILD, c.
|
|
204
|
+
build_format = get_answer(BUILD, c.SQL_FILE_TYPE)
|
|
181
205
|
if build_format == c.SQL_FILE_TYPE:
|
|
182
206
|
build_file = c.BUILD_FILE_STEM + ".sql"
|
|
183
207
|
elif build_format == c.PYTHON_FILE_TYPE:
|
|
@@ -202,7 +226,7 @@ class Initializer:
|
|
|
202
226
|
self._copy_dotenv_files(admin_password)
|
|
203
227
|
self._create_manifest_file(connections_use_yaml, parameters_use_yaml)
|
|
204
228
|
|
|
205
|
-
self.
|
|
229
|
+
self._copy_gitignore_file()
|
|
206
230
|
|
|
207
231
|
if connections_use_py:
|
|
208
232
|
self._copy_pyconfig_file(c.CONNECTIONS_FILE)
|
|
@@ -253,7 +277,7 @@ class Initializer:
|
|
|
253
277
|
print(f"You may also run `sqrl get-file {c.GITIGNORE_FILE}` to add a sample {c.GITIGNORE_FILE} file to your project.")
|
|
254
278
|
print()
|
|
255
279
|
elif args.file_name == c.GITIGNORE_FILE:
|
|
256
|
-
self.
|
|
280
|
+
self._copy_gitignore_file()
|
|
257
281
|
elif args.file_name == c.MANIFEST_FILE:
|
|
258
282
|
self._create_manifest_file(not args.no_connections, args.parameters)
|
|
259
283
|
elif args.file_name in (c.USER_FILE, c.CONNECTIONS_FILE, c.PARAMETERS_FILE, c.CONTEXT_FILE):
|
squirrels/_manifest.py
CHANGED
|
@@ -39,7 +39,7 @@ class _ConfigWithNameBaseModel(BaseModel):
|
|
|
39
39
|
name: str
|
|
40
40
|
|
|
41
41
|
|
|
42
|
-
class
|
|
42
|
+
class ConnectionTypeEnum(Enum):
|
|
43
43
|
SQLALCHEMY = "sqlalchemy"
|
|
44
44
|
CONNECTORX = "connectorx"
|
|
45
45
|
ADBC = "adbc"
|
|
@@ -54,7 +54,7 @@ class ConnectionProperties(BaseModel):
|
|
|
54
54
|
uri: The URI for the connection
|
|
55
55
|
"""
|
|
56
56
|
label: str | None = None
|
|
57
|
-
type:
|
|
57
|
+
type: ConnectionTypeEnum = Field(default=ConnectionTypeEnum.SQLALCHEMY)
|
|
58
58
|
uri: str
|
|
59
59
|
sa_create_engine_args: dict[str, Any] = Field(default_factory=dict)
|
|
60
60
|
|
|
@@ -64,14 +64,14 @@ class ConnectionProperties(BaseModel):
|
|
|
64
64
|
Creates and caches a SQLAlchemy engine if the connection type is sqlalchemy.
|
|
65
65
|
Returns None for other connection types.
|
|
66
66
|
"""
|
|
67
|
-
if self.type ==
|
|
67
|
+
if self.type == ConnectionTypeEnum.SQLALCHEMY:
|
|
68
68
|
return create_engine(self.uri, **self.sa_create_engine_args)
|
|
69
69
|
else:
|
|
70
70
|
raise ValueError(f'Connection type "{self.type}" does not support engine property')
|
|
71
71
|
|
|
72
72
|
@cached_property
|
|
73
73
|
def dialect(self) -> str:
|
|
74
|
-
if self.type ==
|
|
74
|
+
if self.type == ConnectionTypeEnum.SQLALCHEMY:
|
|
75
75
|
dialect = self.engine.dialect.name
|
|
76
76
|
else:
|
|
77
77
|
url = urlparse(self.uri)
|
|
@@ -83,7 +83,7 @@ class ConnectionProperties(BaseModel):
|
|
|
83
83
|
|
|
84
84
|
@cached_property
|
|
85
85
|
def attach_uri_for_duckdb(self) -> str | None:
|
|
86
|
-
if self.type ==
|
|
86
|
+
if self.type == ConnectionTypeEnum.SQLALCHEMY:
|
|
87
87
|
url = self.engine.url
|
|
88
88
|
host = url.host
|
|
89
89
|
port = url.port
|
squirrels/_model_builder.py
CHANGED
|
@@ -79,6 +79,8 @@ class ModelBuilder:
|
|
|
79
79
|
duckdb_stg_path.replace(duckdb_dev_path)
|
|
80
80
|
elif duckdb_path.exists():
|
|
81
81
|
shutil.copy(duckdb_path, duckdb_dev_path)
|
|
82
|
+
else:
|
|
83
|
+
duckdb_dev_path.unlink(missing_ok=True) # delete any lingering development copy to create a fresh one later
|
|
82
84
|
|
|
83
85
|
self._logger.log_activity_time("creating development copy of virtual data environment", start)
|
|
84
86
|
|
squirrels/_model_configs.py
CHANGED
|
@@ -47,7 +47,7 @@ class QueryModelConfig(ModelConfig):
|
|
|
47
47
|
|
|
48
48
|
|
|
49
49
|
class BuildModelConfig(QueryModelConfig):
|
|
50
|
-
materialization: str = Field(default="
|
|
50
|
+
materialization: str = Field(default="VIEW", description="The materialization of the model (ignored if Python model which is always a table)")
|
|
51
51
|
|
|
52
52
|
def get_sql_for_build(self, model_name: str, select_query: str) -> str:
|
|
53
53
|
if self.materialization.upper() == "TABLE":
|
|
@@ -57,7 +57,7 @@ class BuildModelConfig(QueryModelConfig):
|
|
|
57
57
|
else:
|
|
58
58
|
raise ValueError(f"Invalid materialization: {self.materialization}")
|
|
59
59
|
|
|
60
|
-
create_prefix = f"CREATE OR REPLACE {materialization} {model_name} AS\n"
|
|
60
|
+
create_prefix = f"CREATE OR REPLACE {materialization} {model_name} AS\n\n"
|
|
61
61
|
return create_prefix + select_query
|
|
62
62
|
|
|
63
63
|
|
|
@@ -70,5 +70,5 @@ class FederateModelConfig(QueryModelConfig):
|
|
|
70
70
|
|
|
71
71
|
def get_sql_for_create(self, model_name: str, select_query: str) -> str:
|
|
72
72
|
materialization = "TABLE" if self.eager else "VIEW"
|
|
73
|
-
create_prefix = f"CREATE {materialization} {model_name} AS\n"
|
|
73
|
+
create_prefix = f"CREATE {materialization} {model_name} AS\n\n"
|
|
74
74
|
return create_prefix + select_query
|
squirrels/_model_queries.py
CHANGED
|
@@ -3,7 +3,7 @@ from dataclasses import dataclass, field
|
|
|
3
3
|
from typing import Callable, Generic, TypeVar, Any
|
|
4
4
|
import polars as pl, pandas as pd
|
|
5
5
|
|
|
6
|
-
from .
|
|
6
|
+
from ._arguments._run_time_args import BuildModelArgs
|
|
7
7
|
from ._model_configs import ModelConfig
|
|
8
8
|
|
|
9
9
|
|
squirrels/_models.py
CHANGED
|
@@ -9,7 +9,7 @@ import polars as pl, pandas as pd, networkx as nx
|
|
|
9
9
|
|
|
10
10
|
from . import _constants as c, _utils as u, _py_module as pm, _model_queries as mq, _model_configs as mc, _sources as src, _api_response_models as arm
|
|
11
11
|
from ._exceptions import FileExecutionError, InvalidInputError
|
|
12
|
-
from .
|
|
12
|
+
from ._arguments._run_time_args import ContextArgs, ModelArgs, BuildModelArgs
|
|
13
13
|
from ._auth import BaseUser
|
|
14
14
|
from ._connection_set import ConnectionsArgs, ConnectionSet, ConnectionProperties
|
|
15
15
|
from ._manifest import DatasetConfig
|
|
@@ -253,8 +253,12 @@ class SourceModel(StaticModel):
|
|
|
253
253
|
connection_props = self.conn_set.get_connection(conn_name)
|
|
254
254
|
if isinstance(connection_props, ConnectionProperties):
|
|
255
255
|
dialect = connection_props.dialect
|
|
256
|
+
attach_uri = connection_props.attach_uri_for_duckdb
|
|
256
257
|
else:
|
|
257
|
-
raise u.ConfigurationError(f'Unable to use connection "{conn_name}" for source "{self.name}"')
|
|
258
|
+
raise u.ConfigurationError(f'Unable to use connection "{conn_name}" for source "{self.name}". Connection "{conn_name}" must be a ConnectionProperties object')
|
|
259
|
+
|
|
260
|
+
if attach_uri is None:
|
|
261
|
+
raise u.ConfigurationError(f'Loading to duckdb is not supported for source "{self.name}" since its connection "{conn_name}" uses an unsupported dialect')
|
|
258
262
|
|
|
259
263
|
result = u.run_duckdb_stmt(self.logger, local_conn, f"FROM (SHOW DATABASES) WHERE database_name = 'db_{conn_name}'").fetchone()
|
|
260
264
|
if result is None:
|
|
@@ -351,7 +355,7 @@ class QueryModel(DataModel):
|
|
|
351
355
|
def _get_compile_sql_model_args_from_ctx_args(
|
|
352
356
|
self, ctx: dict[str, Any], ctx_args: ContextArgs
|
|
353
357
|
) -> dict[str, Any]:
|
|
354
|
-
is_placeholder = lambda placeholder: placeholder in ctx_args.
|
|
358
|
+
is_placeholder = lambda placeholder: placeholder in ctx_args._placeholders_copy
|
|
355
359
|
kwargs = {
|
|
356
360
|
"proj_vars": ctx_args.proj_vars, "env_vars": ctx_args.env_vars, "user": ctx_args.user, "prms": ctx_args.prms,
|
|
357
361
|
"traits": ctx_args.traits, "ctx": ctx, "is_placeholder": is_placeholder, "set_placeholder": ctx_args.set_placeholder,
|
|
@@ -424,6 +428,11 @@ class QueryModel(DataModel):
|
|
|
424
428
|
dependent_model_names.add(self.name)
|
|
425
429
|
for dep_model in self.upstreams.values():
|
|
426
430
|
dep_model.retrieve_dependent_query_models(dependent_model_names)
|
|
431
|
+
|
|
432
|
+
def _log_sql_to_run(self, sql: str, placeholders: dict[str, Any]) -> None:
|
|
433
|
+
log_msg = f"SQL to run for model '{self.name}':\n{sql}"
|
|
434
|
+
log_msg += f"\n\n(with placeholders: {placeholders})"
|
|
435
|
+
self.logger.info(log_msg)
|
|
427
436
|
|
|
428
437
|
|
|
429
438
|
@dataclass
|
|
@@ -460,6 +469,7 @@ class DbviewModel(QueryModel):
|
|
|
460
469
|
return "{{ source(\"" + source_name + "\") }}"
|
|
461
470
|
|
|
462
471
|
kwargs["source"] = source
|
|
472
|
+
kwargs["ref"] = source
|
|
463
473
|
return kwargs
|
|
464
474
|
|
|
465
475
|
def _get_duckdb_query(self, read_dialect: str, query: str) -> str:
|
|
@@ -476,15 +486,15 @@ class DbviewModel(QueryModel):
|
|
|
476
486
|
connection_props = self.conn_set.get_connection(connection_name)
|
|
477
487
|
|
|
478
488
|
if self.model_config.translate_to_duckdb and isinstance(connection_props, ConnectionProperties):
|
|
479
|
-
macros = {
|
|
480
|
-
"source": lambda source_name: "venv." + source_name
|
|
489
|
+
macros = {
|
|
490
|
+
"source": lambda source_name: "venv." + source_name
|
|
481
491
|
}
|
|
482
492
|
compiled_query2 = self._get_compiled_sql_query_str(compiled_query_str, macros)
|
|
483
493
|
compiled_query_str = self._get_duckdb_query(connection_props.dialect, compiled_query2)
|
|
484
494
|
is_duckdb = True
|
|
485
495
|
else:
|
|
486
|
-
macros = {
|
|
487
|
-
"source": lambda source_name: self.sources[source_name].get_table()
|
|
496
|
+
macros = {
|
|
497
|
+
"source": lambda source_name: self.sources[source_name].get_table()
|
|
488
498
|
}
|
|
489
499
|
compiled_query_str = self._get_compiled_sql_query_str(compiled_query_str, macros)
|
|
490
500
|
is_duckdb = False
|
|
@@ -518,7 +528,7 @@ class DbviewModel(QueryModel):
|
|
|
518
528
|
if is_duckdb:
|
|
519
529
|
local_conn = conn.cursor()
|
|
520
530
|
try:
|
|
521
|
-
self.logger.info(f"Running
|
|
531
|
+
self.logger.info(f"Running dbview '{self.name}' on duckdb")
|
|
522
532
|
return local_conn.sql(query, params=placeholders).pl()
|
|
523
533
|
except duckdb.CatalogException as e:
|
|
524
534
|
raise InvalidInputError(61, f'Model "{self.name}" depends on static data models that cannot be found.')
|
|
@@ -527,10 +537,12 @@ class DbviewModel(QueryModel):
|
|
|
527
537
|
finally:
|
|
528
538
|
local_conn.close()
|
|
529
539
|
else:
|
|
530
|
-
|
|
540
|
+
self.logger.info(f"Running dbview '{self.name}' on connection: {connection_name}")
|
|
541
|
+
return self.conn_set.run_sql_query_from_conn_name(query, connection_name, placeholders)
|
|
531
542
|
except RuntimeError as e:
|
|
532
543
|
raise FileExecutionError(f'Failed to run dbview sql model "{self.name}"', e)
|
|
533
544
|
|
|
545
|
+
self._log_sql_to_run(query, placeholders)
|
|
534
546
|
result = await asyncio.to_thread(run_sql_query_on_connection, is_duckdb, query, placeholders)
|
|
535
547
|
self.result = result.lazy()
|
|
536
548
|
|
|
@@ -580,7 +592,7 @@ class FederateModel(QueryModel):
|
|
|
580
592
|
connections = self.conn_set.get_connections_as_dict()
|
|
581
593
|
|
|
582
594
|
def run_external_sql(connection_name: str, sql_query: str) -> pl.DataFrame:
|
|
583
|
-
return self._run_sql_query_on_connection(connection_name, sql_query, ctx_args.
|
|
595
|
+
return self._run_sql_query_on_connection(connection_name, sql_query, ctx_args._placeholders_copy)
|
|
584
596
|
|
|
585
597
|
conn_args = ConnectionsArgs(ctx_args.project_path, ctx_args.proj_vars, ctx_args.env_vars)
|
|
586
598
|
build_model_args = BuildModelArgs(conn_args, connections, dependencies, self._ref_for_python, run_external_sql)
|
|
@@ -636,10 +648,12 @@ class FederateModel(QueryModel):
|
|
|
636
648
|
query = compiled_query.query
|
|
637
649
|
|
|
638
650
|
def create_table(local_conn: duckdb.DuckDBPyConnection):
|
|
639
|
-
|
|
640
|
-
|
|
651
|
+
# DuckDB doesn't support specifying named parameters that are not used in the query, so filtering them out
|
|
652
|
+
placeholder_exists = lambda key: re.search(r"\$" + key + r"(?!\w)", query)
|
|
653
|
+
existing_placeholders = {key: value for key, value in placeholders.items() if placeholder_exists(key)}
|
|
641
654
|
|
|
642
655
|
create_query = self.model_config.get_sql_for_create(self.name, query)
|
|
656
|
+
self._log_sql_to_run(create_query, existing_placeholders)
|
|
643
657
|
try:
|
|
644
658
|
return local_conn.execute(create_query, existing_placeholders)
|
|
645
659
|
except duckdb.CatalogException as e:
|
|
@@ -772,7 +786,7 @@ class BuildModel(StaticModel, QueryModel):
|
|
|
772
786
|
create_query = self.model_config.get_sql_for_build(self.name, query)
|
|
773
787
|
local_conn = conn.cursor()
|
|
774
788
|
try:
|
|
775
|
-
return u.run_duckdb_stmt(self.logger, local_conn, create_query)
|
|
789
|
+
return u.run_duckdb_stmt(self.logger, local_conn, create_query, model_name=self.name)
|
|
776
790
|
except Exception as e:
|
|
777
791
|
raise FileExecutionError(f'Failed to build static sql model "{self.name}"', e) from e
|
|
778
792
|
finally:
|
|
@@ -920,7 +934,7 @@ class DAG:
|
|
|
920
934
|
|
|
921
935
|
self._compile_models(context, ctx_args, recurse)
|
|
922
936
|
|
|
923
|
-
self.placeholders = ctx_args.
|
|
937
|
+
self.placeholders = ctx_args._placeholders_copy
|
|
924
938
|
if runquery:
|
|
925
939
|
await self._run_models()
|
|
926
940
|
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
from squirrels import
|
|
1
|
+
from squirrels import arguments as args, dashboards as d
|
|
2
2
|
from matplotlib import pyplot as plt, figure as f, axes as a
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
async def main(sqrl: DashboardArgs) -> d.PngDashboard:
|
|
6
|
-
spending_by_month_df = await sqrl.dataset("federate_dataset_example", fixed_parameters={"group_by": "
|
|
7
|
-
spending_by_subcategory_df = await sqrl.dataset("federate_dataset_example", fixed_parameters={"group_by": "
|
|
5
|
+
async def main(sqrl: args.DashboardArgs) -> d.PngDashboard:
|
|
6
|
+
spending_by_month_df = await sqrl.dataset("federate_dataset_example", fixed_parameters={"group_by": "month"})
|
|
7
|
+
spending_by_subcategory_df = await sqrl.dataset("federate_dataset_example", fixed_parameters={"group_by": "subcat"})
|
|
8
8
|
|
|
9
9
|
# Create a figure with two subplots
|
|
10
10
|
fig, (ax0, ax1) = plt.subplots(2, 1, figsize=(8, 8), height_ratios=(1, 2))
|
|
@@ -14,9 +14,9 @@ depends_on:
|
|
|
14
14
|
- name: dataset_example_month
|
|
15
15
|
dataset: federate_dataset_example
|
|
16
16
|
fixed_parameters:
|
|
17
|
-
- group_by:
|
|
17
|
+
- group_by: month (Month)
|
|
18
18
|
|
|
19
19
|
- name: dataset_example_subcategory
|
|
20
20
|
dataset: federate_dataset_example
|
|
21
21
|
fixed_parameters:
|
|
22
|
-
- group_by:
|
|
22
|
+
- group_by: subcat (Subcategory)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{%- macro date_and_amount_filters(use_from_range) -%}
|
|
2
|
+
{%- if use_from_range -%}
|
|
3
|
+
|
|
4
|
+
date >= {{ ctx.start_date_from_range | quote }}
|
|
5
|
+
AND date <= {{ ctx.end_date_from_range | quote }}
|
|
6
|
+
AND amount >= {{ ctx.min_amount_from_range }}
|
|
7
|
+
AND amount <= {{ ctx.max_amount_from_range }}
|
|
8
|
+
|
|
9
|
+
{%- else -%}
|
|
10
|
+
|
|
11
|
+
date >= {{ ctx.start_date | quote }}
|
|
12
|
+
AND date <= {{ ctx.end_date | quote }}
|
|
13
|
+
AND amount >= {{ ctx.min_amount }}
|
|
14
|
+
AND amount <= {{ ctx.max_amount }}
|
|
15
|
+
|
|
16
|
+
{%- endif -%}
|
|
17
|
+
{%- endmacro -%}
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
from squirrels import
|
|
1
|
+
from squirrels import arguments as args
|
|
2
2
|
import polars as pl, pandas as pd
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
def main(sqrl: BuildModelArgs) -> pl.LazyFrame | pl.DataFrame | pd.DataFrame:
|
|
5
|
+
def main(sqrl: args.BuildModelArgs) -> pl.LazyFrame | pl.DataFrame | pd.DataFrame:
|
|
6
6
|
"""
|
|
7
7
|
Create a build model by joining/processing sources or other build models to form a new
|
|
8
8
|
Python DataFrame (using polars LazyFrame, polars DataFrame, or pandas DataFrame).
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
description: |
|
|
2
2
|
This is an example of a build model. It adds a new column called "month" to the source table "src_transactions".
|
|
3
3
|
|
|
4
|
+
materialization: TABLE # optional - defaults to "VIEW" for SQL models, ignored and always a "TABLE" for Python models
|
|
5
|
+
|
|
4
6
|
depends_on: # optional for SQL models - the "ref" macro also adds to this set
|
|
5
7
|
- src_transactions
|
|
6
8
|
- seed_categories
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from squirrels import arguments as args
|
|
2
|
+
import polars as pl, pandas as pd
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def main(sqrl: args.ModelArgs) -> pl.LazyFrame | pl.DataFrame | pd.DataFrame:
|
|
6
|
+
"""
|
|
7
|
+
Create federated models by joining/processing dependent models (sources, seeds, builds, dbviews, other federates, etc.) to
|
|
8
|
+
form a new Python DataFrame (using polars LazyFrame, polars DataFrame, or pandas DataFrame).
|
|
9
|
+
"""
|
|
10
|
+
df = sqrl.ref("build_example")
|
|
11
|
+
|
|
12
|
+
df = df.filter(
|
|
13
|
+
(pl.col("date") >= sqrl.ctx["start_date_from_range"]) &
|
|
14
|
+
(pl.col("date") <= sqrl.ctx["end_date_from_range"]) &
|
|
15
|
+
(pl.col("amount") >= sqrl.ctx["min_amount_from_range"]) &
|
|
16
|
+
(pl.col("amount") <= sqrl.ctx["max_amount_from_range"])
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
if sqrl.ctx["has_categories"]:
|
|
20
|
+
categories: list[str] = sqrl.ctx["categories"]
|
|
21
|
+
df = df.filter(pl.col("category_id").is_in(categories))
|
|
22
|
+
|
|
23
|
+
if sqrl.ctx["has_subcategories"]:
|
|
24
|
+
subcategories: list[str] = sqrl.ctx["subcategories"]
|
|
25
|
+
df = df.filter(pl.col("subcategory_id").is_in(subcategories))
|
|
26
|
+
|
|
27
|
+
dimension_cols: list[str] = sqrl.ctx["group_by_cols"]
|
|
28
|
+
df = df.group_by(dimension_cols).agg(
|
|
29
|
+
pl.sum("amount").cast(pl.Decimal(precision=15, scale=2)).alias("total_amount")
|
|
30
|
+
)
|
|
31
|
+
df = df.rename(sqrl.ctx["rename_dict"])
|
|
32
|
+
|
|
33
|
+
order_by_cols: list[str] = sqrl.ctx["order_by_cols"]
|
|
34
|
+
df = df.select(*order_by_cols, "total_amount") \
|
|
35
|
+
.sort(order_by_cols, descending=True)
|
|
36
|
+
|
|
37
|
+
if "limit" in sqrl.ctx:
|
|
38
|
+
limit: int = sqrl.ctx["limit"]
|
|
39
|
+
df = df.limit(limit)
|
|
40
|
+
|
|
41
|
+
return df
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{#- DuckDB dialect -#}
|
|
2
|
+
|
|
3
|
+
SELECT {{ ctx.select_dim_cols | join }}
|
|
4
|
+
, CAST(SUM(amount) AS DECIMAL(15, 2)) as total_amount
|
|
5
|
+
|
|
6
|
+
{# ref() can be used on a sources, seeds, builds, dbviews, or other federate models -#}
|
|
7
|
+
FROM {{ ref("build_example") }} AS a
|
|
8
|
+
|
|
9
|
+
WHERE {{ date_and_amount_filters(use_from_range=true) }}
|
|
10
|
+
{%- if ctx.has_categories %}
|
|
11
|
+
AND category_id IN ({{ ctx.categories | quote_and_join }})
|
|
12
|
+
{%- endif %}
|
|
13
|
+
{%- if ctx.has_subcategories %}
|
|
14
|
+
AND subcategory_id IN ({{ ctx.subcategories | quote_and_join }})
|
|
15
|
+
{%- endif %}
|
|
16
|
+
|
|
17
|
+
GROUP BY {{ ctx.group_by_cols | join }}
|
|
18
|
+
|
|
19
|
+
ORDER BY {{ ctx.order_by_cols_desc | join }}
|
|
20
|
+
|
|
21
|
+
{%- if ctx.limit %}
|
|
22
|
+
|
|
23
|
+
LIMIT {{ ctx.limit }}
|
|
24
|
+
|
|
25
|
+
{%- endif %}
|