squirrels 0.5.0b1__py3-none-any.whl → 0.5.0b3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. dateutils/__init__.py +6 -0
  2. dateutils/_enums.py +25 -0
  3. squirrels/dateutils.py → dateutils/_implementation.py +58 -111
  4. dateutils/types.py +6 -0
  5. squirrels/__init__.py +7 -13
  6. squirrels/_api_server.py +5 -5
  7. squirrels/{arguments/init_time_args.py → _arguments/_init_time_args.py} +2 -2
  8. squirrels/{arguments/run_time_args.py → _arguments/_run_time_args.py} +4 -26
  9. squirrels/_auth.py +2 -2
  10. squirrels/_command_line.py +13 -9
  11. squirrels/_connection_set.py +5 -5
  12. squirrels/_constants.py +1 -1
  13. squirrels/_dashboard_types.py +82 -0
  14. squirrels/_dashboards_io.py +2 -2
  15. squirrels/_data_sources.py +564 -0
  16. squirrels/_exceptions.py +1 -1
  17. squirrels/_initializer.py +82 -58
  18. squirrels/_manifest.py +5 -5
  19. squirrels/_model_builder.py +2 -0
  20. squirrels/_model_configs.py +3 -3
  21. squirrels/_model_queries.py +1 -1
  22. squirrels/_models.py +28 -14
  23. squirrels/{package_data → _package_data}/base_project/dashboards/dashboard_example.py +4 -4
  24. squirrels/{package_data → _package_data}/base_project/dashboards/dashboard_example.yml +2 -2
  25. squirrels/_package_data/base_project/macros/macros_example.sql +17 -0
  26. squirrels/{package_data → _package_data}/base_project/models/builds/build_example.py +2 -2
  27. squirrels/{package_data → _package_data}/base_project/models/builds/build_example.sql +1 -1
  28. squirrels/{package_data → _package_data}/base_project/models/builds/build_example.yml +2 -0
  29. squirrels/{package_data → _package_data}/base_project/models/dbviews/dbview_example.sql +1 -1
  30. squirrels/_package_data/base_project/models/federates/federate_example.py +41 -0
  31. squirrels/_package_data/base_project/models/federates/federate_example.sql +25 -0
  32. squirrels/{package_data → _package_data}/base_project/models/federates/federate_example.yml +6 -6
  33. squirrels/{package_data → _package_data}/base_project/parameters.yml +9 -8
  34. squirrels/_package_data/base_project/pyconfigs/connections.py +14 -0
  35. squirrels/{package_data → _package_data}/base_project/pyconfigs/context.py +14 -16
  36. squirrels/{package_data → _package_data}/base_project/pyconfigs/parameters.py +13 -8
  37. squirrels/{package_data → _package_data}/base_project/pyconfigs/user.py +2 -2
  38. squirrels/_parameter_configs.py +34 -34
  39. squirrels/_parameter_options.py +348 -0
  40. squirrels/_parameter_sets.py +18 -18
  41. squirrels/_parameters.py +1266 -0
  42. squirrels/_project.py +37 -12
  43. squirrels/_utils.py +5 -3
  44. squirrels/arguments.py +2 -0
  45. squirrels/connections.py +1 -0
  46. squirrels/dashboards.py +1 -82
  47. squirrels/data_sources.py +8 -563
  48. squirrels/parameter_options.py +8 -348
  49. squirrels/parameters.py +9 -1266
  50. squirrels/types.py +11 -0
  51. {squirrels-0.5.0b1.dist-info → squirrels-0.5.0b3.dist-info}/METADATA +11 -17
  52. squirrels-0.5.0b3.dist-info/RECORD +80 -0
  53. squirrels/package_data/base_project/macros/macros_example.sql +0 -15
  54. squirrels/package_data/base_project/models/federates/federate_example.py +0 -44
  55. squirrels/package_data/base_project/models/federates/federate_example.sql +0 -17
  56. squirrels/package_data/base_project/pyconfigs/connections.py +0 -14
  57. squirrels-0.5.0b1.dist-info/RECORD +0 -70
  58. /squirrels/{dataset_result.py → _dataset_types.py} +0 -0
  59. /squirrels/{package_data → _package_data}/base_project/.env +0 -0
  60. /squirrels/{package_data → _package_data}/base_project/.env.example +0 -0
  61. /squirrels/{package_data → _package_data}/base_project/assets/expenses.db +0 -0
  62. /squirrels/{package_data → _package_data}/base_project/assets/weather.db +0 -0
  63. /squirrels/{package_data → _package_data}/base_project/connections.yml +0 -0
  64. /squirrels/{package_data → _package_data}/base_project/docker/.dockerignore +0 -0
  65. /squirrels/{package_data → _package_data}/base_project/docker/Dockerfile +0 -0
  66. /squirrels/{package_data → _package_data}/base_project/docker/compose.yml +0 -0
  67. /squirrels/{package_data → _package_data}/base_project/duckdb_init.sql +0 -0
  68. /squirrels/{package_data/base_project/.gitignore → _package_data/base_project/gitignore} +0 -0
  69. /squirrels/{package_data → _package_data}/base_project/models/dbviews/dbview_example.yml +0 -0
  70. /squirrels/{package_data → _package_data}/base_project/models/sources.yml +0 -0
  71. /squirrels/{package_data → _package_data}/base_project/seeds/seed_categories.csv +0 -0
  72. /squirrels/{package_data → _package_data}/base_project/seeds/seed_categories.yml +0 -0
  73. /squirrels/{package_data → _package_data}/base_project/seeds/seed_subcategories.csv +0 -0
  74. /squirrels/{package_data → _package_data}/base_project/seeds/seed_subcategories.yml +0 -0
  75. /squirrels/{package_data → _package_data}/base_project/squirrels.yml.j2 +0 -0
  76. /squirrels/{package_data → _package_data}/base_project/tmp/.gitignore +0 -0
  77. {squirrels-0.5.0b1.dist-info → squirrels-0.5.0b3.dist-info}/WHEEL +0 -0
  78. {squirrels-0.5.0b1.dist-info → squirrels-0.5.0b3.dist-info}/entry_points.txt +0 -0
  79. {squirrels-0.5.0b1.dist-info → squirrels-0.5.0b3.dist-info}/licenses/LICENSE +0 -0
squirrels/_initializer.py CHANGED
@@ -1,35 +1,37 @@
1
1
  from typing import Optional
2
2
  from datetime import datetime
3
+ from pathlib import Path
3
4
  import inquirer, os, shutil, secrets
4
5
 
5
6
  from . import _constants as c, _utils as u
6
7
 
7
- base_proj_dir = u.Path(os.path.dirname(__file__), c.PACKAGE_DATA_FOLDER, c.BASE_PROJECT_FOLDER)
8
+ base_proj_dir = Path(os.path.dirname(__file__), c.PACKAGE_DATA_FOLDER, c.BASE_PROJECT_FOLDER)
8
9
 
9
10
  TMP_FOLDER = "tmp"
10
11
 
11
12
 
12
13
  class Initializer:
13
- def __init__(self, *, project_name: Optional[str] = None, overwrite: bool = False):
14
- self.project_name = project_name
15
- self.overwrite = overwrite
14
+ def __init__(self, *, project_name: Optional[str] = None, use_curr_dir: bool = False):
15
+ self.project_name = project_name if not use_curr_dir else None
16
+ self.use_curr_dir = use_curr_dir
16
17
 
17
- def _path_exists(self, filepath: u.Path) -> bool:
18
+ def _path_exists(self, filepath: Path) -> bool:
18
19
  return os.path.exists(filepath)
19
20
 
20
- def _files_have_same_content(self, file1: u.Path, file2: u.Path) -> bool:
21
+ def _files_have_same_content(self, file1: Path, file2: Path) -> bool:
21
22
  with open(file1, 'rb') as f1, open(file2, 'rb') as f2:
22
23
  return f1.read() == f2.read()
23
24
 
24
- def _add_timestamp_to_filename(self, path: u.Path) -> u.Path:
25
+ def _add_timestamp_to_filename(self, path: Path) -> Path:
25
26
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
26
27
  new_filename = f"{path.stem}_{timestamp}{path.suffix}"
27
28
  return path.with_name(new_filename)
28
29
 
29
- def _copy_file(self, filepath: u.Path, *, src_folder: str = ""):
30
- src_path = u.Path(base_proj_dir, src_folder, filepath)
30
+ def _copy_file(self, filepath: Path, *, src_folder: str = "", src_file: Path | None = None):
31
+ src_file = src_file if src_file is not None else filepath
32
+ src_path = Path(base_proj_dir, src_folder, src_file)
31
33
 
32
- filepath2 = u.Path(self.project_name, filepath) if self.project_name else filepath
34
+ filepath2 = Path(self.project_name, filepath) if self.project_name else filepath
33
35
  dest_dir = os.path.dirname(filepath2)
34
36
  if dest_dir != "":
35
37
  os.makedirs(dest_dir, exist_ok=True)
@@ -40,8 +42,6 @@ class Initializer:
40
42
  if self._files_have_same_content(src_path, filepath2):
41
43
  perform_copy = False
42
44
  extra_msg = "Skipping... file contents is same as source"
43
- elif self.overwrite:
44
- extra_msg = "Overwriting file..."
45
45
  else:
46
46
  filepath2 = self._add_timestamp_to_filename(old_filepath)
47
47
  extra_msg = f'Creating file as "{filepath2}" instead...'
@@ -53,38 +53,38 @@ class Initializer:
53
53
  shutil.copy(src_path, filepath2)
54
54
 
55
55
  def _copy_macros_file(self, filepath: str):
56
- self._copy_file(u.Path(c.MACROS_FOLDER, filepath))
56
+ self._copy_file(Path(c.MACROS_FOLDER, filepath))
57
57
 
58
58
  def _copy_models_file(self, filepath: str):
59
- self._copy_file(u.Path(c.MODELS_FOLDER, filepath))
59
+ self._copy_file(Path(c.MODELS_FOLDER, filepath))
60
60
 
61
61
  def _copy_build_file(self, filepath: str):
62
- self._copy_file(u.Path(c.MODELS_FOLDER, c.BUILDS_FOLDER, filepath))
62
+ self._copy_file(Path(c.MODELS_FOLDER, c.BUILDS_FOLDER, filepath))
63
63
 
64
64
  def _copy_dbview_file(self, filepath: str):
65
- self._copy_file(u.Path(c.MODELS_FOLDER, c.DBVIEWS_FOLDER, filepath))
65
+ self._copy_file(Path(c.MODELS_FOLDER, c.DBVIEWS_FOLDER, filepath))
66
66
 
67
67
  def _copy_federate_file(self, filepath: str):
68
- self._copy_file(u.Path(c.MODELS_FOLDER, c.FEDERATES_FOLDER, filepath))
68
+ self._copy_file(Path(c.MODELS_FOLDER, c.FEDERATES_FOLDER, filepath))
69
69
 
70
70
  def _copy_database_file(self, filepath: str):
71
- self._copy_file(u.Path(c.DATABASE_FOLDER, filepath))
71
+ self._copy_file(Path(c.DATABASE_FOLDER, filepath))
72
72
 
73
73
  def _copy_pyconfig_file(self, filepath: str):
74
- self._copy_file(u.Path(c.PYCONFIGS_FOLDER, filepath))
74
+ self._copy_file(Path(c.PYCONFIGS_FOLDER, filepath))
75
75
 
76
76
  def _copy_seed_file(self, filepath: str):
77
- self._copy_file(u.Path(c.SEEDS_FOLDER, filepath))
77
+ self._copy_file(Path(c.SEEDS_FOLDER, filepath))
78
78
 
79
79
  def _copy_dashboard_file(self, filepath: str):
80
- self._copy_file(u.Path(c.DASHBOARDS_FOLDER, filepath))
80
+ self._copy_file(Path(c.DASHBOARDS_FOLDER, filepath))
81
81
 
82
82
  def _create_manifest_file(self, has_connections: bool, has_parameters: bool):
83
83
  def get_content(file_name: Optional[str]) -> str:
84
84
  if file_name is None:
85
85
  return ""
86
86
 
87
- yaml_path = u.Path(base_proj_dir, file_name)
87
+ yaml_path = Path(base_proj_dir, file_name)
88
88
  return yaml_path.read_text()
89
89
 
90
90
  file_name_dict = {
@@ -95,10 +95,10 @@ class Initializer:
95
95
 
96
96
  manifest_template = get_content(c.MANIFEST_JINJA_FILE)
97
97
  manifest_content = u.render_string(manifest_template, **substitutions)
98
- output_path = u.Path(base_proj_dir, TMP_FOLDER, c.MANIFEST_FILE)
98
+ output_path = Path(base_proj_dir, TMP_FOLDER, c.MANIFEST_FILE)
99
99
  output_path.write_text(manifest_content)
100
100
 
101
- self._copy_file(u.Path(c.MANIFEST_FILE), src_folder=TMP_FOLDER)
101
+ self._copy_file(Path(c.MANIFEST_FILE), src_folder=TMP_FOLDER)
102
102
 
103
103
  def _copy_dotenv_files(self, admin_password: str | None = None):
104
104
  substitutions = {
@@ -106,52 +106,76 @@ class Initializer:
106
106
  "random_admin_password": admin_password if admin_password else secrets.token_urlsafe(8),
107
107
  }
108
108
 
109
- dotenv_path = u.Path(base_proj_dir, c.DOTENV_FILE)
109
+ dotenv_path = Path(base_proj_dir, c.DOTENV_FILE)
110
110
  contents = u.render_string(dotenv_path.read_text(), **substitutions)
111
111
 
112
- output_path = u.Path(base_proj_dir, TMP_FOLDER, c.DOTENV_FILE)
112
+ output_path = Path(base_proj_dir, TMP_FOLDER, c.DOTENV_FILE)
113
113
  output_path.write_text(contents)
114
114
 
115
- self._copy_file(u.Path(c.DOTENV_FILE), src_folder=TMP_FOLDER)
116
- self._copy_file(u.Path(c.DOTENV_FILE + ".example"))
115
+ self._copy_file(Path(c.DOTENV_FILE), src_folder=TMP_FOLDER)
116
+ self._copy_file(Path(c.DOTENV_FILE + ".example"))
117
+
118
+ def _copy_gitignore_file(self):
119
+ self._copy_file(Path(c.GITIGNORE_FILE), src_file=Path("gitignore"))
117
120
 
118
121
  def init_project(self, args):
119
- options = ["core", "connections", "parameters", "build", "federate", "dashboard"]
120
- _, CONNECTIONS, PARAMETERS, BUILD, FEDERATE, DASHBOARD = options
122
+ options = ["connections", "parameters", "build", "federate", "dashboard", "admin_password"]
123
+ CONNECTIONS, PARAMETERS, BUILD, FEDERATE, DASHBOARD, ADMIN_PASSWORD = options
121
124
 
122
125
  # Add project name prompt if not provided
123
- if self.project_name is None:
126
+ if self.project_name is None and not args.curr_dir:
124
127
  questions = [
125
- inquirer.Text('project_name', message="What is your project name? (leave blank to create in current directory)")
128
+ inquirer.Text('project_name', message="What is your project folder name? (leave blank to create in current directory)")
126
129
  ]
127
130
  answers = inquirer.prompt(questions)
128
131
  assert isinstance(answers, dict)
129
132
  self.project_name = answers['project_name']
130
133
 
131
134
  answers = { x: getattr(args, x) for x in options }
132
- if not any(answers.values()):
133
- questions = [
134
- inquirer.List(
135
- CONNECTIONS, message=f"How would you like to configure the database connections?", choices=c.CONF_FORMAT_CHOICES
136
- ),
137
- inquirer.List(
138
- PARAMETERS, message=f"How would you like to configure the parameters?", choices=c.CONF_FORMAT_CHOICES2
139
- ),
140
- inquirer.List(
141
- BUILD, message="What's the file format for the build model?", choices=c.FILE_TYPE_CHOICES
142
- ),
143
- inquirer.List(
144
- FEDERATE, message="What's the file format for the federated model?", choices=c.FILE_TYPE_CHOICES
145
- ),
146
- inquirer.Confirm(
147
- DASHBOARD, message=f"Do you want to include a dashboard example?", default=False
148
- ),
149
- inquirer.Password(
150
- "admin_password", message="What's the admin password? (leave blank to generate a random one)"
151
- ),
152
- ]
153
- answers = inquirer.prompt(questions)
154
- assert isinstance(answers, dict)
135
+ if answers.get(DASHBOARD) is not None:
136
+ answers[DASHBOARD] = (answers[DASHBOARD] == 'y') # convert 'y' or 'n' to boolean
137
+
138
+ if not args.use_defaults:
139
+ questions = []
140
+ if answers.get(CONNECTIONS) is None:
141
+ questions.append(
142
+ inquirer.List(
143
+ CONNECTIONS, message=f"How would you like to configure the database connections?", choices=c.CONF_FORMAT_CHOICES
144
+ ),
145
+ )
146
+ if answers.get(PARAMETERS) is None:
147
+ questions.append(
148
+ inquirer.List(
149
+ PARAMETERS, message=f"How would you like to configure the parameters?", choices=c.CONF_FORMAT_CHOICES2
150
+ ),
151
+ )
152
+ if answers.get(BUILD) is None:
153
+ questions.append(
154
+ inquirer.List(
155
+ BUILD, message="What's the file format for the build model?", choices=c.FILE_TYPE_CHOICES
156
+ ),
157
+ )
158
+ if answers.get(FEDERATE) is None:
159
+ questions.append(
160
+ inquirer.List(
161
+ FEDERATE, message="What's the file format for the federated model?", choices=c.FILE_TYPE_CHOICES
162
+ ),
163
+ )
164
+ if answers.get(DASHBOARD) is None:
165
+ questions.append(
166
+ inquirer.Confirm(
167
+ DASHBOARD, message=f"Do you want to include a dashboard example?", default=False
168
+ ),
169
+ )
170
+ if answers.get(ADMIN_PASSWORD) is None:
171
+ questions.append(
172
+ inquirer.Password(
173
+ "admin_password", message="What's the admin password? (leave blank to generate a random one)"
174
+ ),
175
+ )
176
+ more_answers = inquirer.prompt(questions)
177
+ assert isinstance(more_answers, dict)
178
+ answers.update(more_answers)
155
179
 
156
180
  def get_answer(key, default):
157
181
  """
@@ -177,7 +201,7 @@ class Initializer:
177
201
  parameters_use_py = (parameters_format == c.PYTHON_FORMAT)
178
202
 
179
203
  build_config_file = c.BUILD_FILE_STEM + ".yml"
180
- build_format = get_answer(BUILD, c.PYTHON_FILE_TYPE)
204
+ build_format = get_answer(BUILD, c.SQL_FILE_TYPE)
181
205
  if build_format == c.SQL_FILE_TYPE:
182
206
  build_file = c.BUILD_FILE_STEM + ".sql"
183
207
  elif build_format == c.PYTHON_FILE_TYPE:
@@ -202,7 +226,7 @@ class Initializer:
202
226
  self._copy_dotenv_files(admin_password)
203
227
  self._create_manifest_file(connections_use_yaml, parameters_use_yaml)
204
228
 
205
- self._copy_file(u.Path(c.GITIGNORE_FILE))
229
+ self._copy_gitignore_file()
206
230
 
207
231
  if connections_use_py:
208
232
  self._copy_pyconfig_file(c.CONNECTIONS_FILE)
@@ -253,7 +277,7 @@ class Initializer:
253
277
  print(f"You may also run `sqrl get-file {c.GITIGNORE_FILE}` to add a sample {c.GITIGNORE_FILE} file to your project.")
254
278
  print()
255
279
  elif args.file_name == c.GITIGNORE_FILE:
256
- self._copy_file(u.Path(c.GITIGNORE_FILE))
280
+ self._copy_gitignore_file()
257
281
  elif args.file_name == c.MANIFEST_FILE:
258
282
  self._create_manifest_file(not args.no_connections, args.parameters)
259
283
  elif args.file_name in (c.USER_FILE, c.CONNECTIONS_FILE, c.PARAMETERS_FILE, c.CONTEXT_FILE):
squirrels/_manifest.py CHANGED
@@ -39,7 +39,7 @@ class _ConfigWithNameBaseModel(BaseModel):
39
39
  name: str
40
40
 
41
41
 
42
- class ConnectionType(Enum):
42
+ class ConnectionTypeEnum(Enum):
43
43
  SQLALCHEMY = "sqlalchemy"
44
44
  CONNECTORX = "connectorx"
45
45
  ADBC = "adbc"
@@ -54,7 +54,7 @@ class ConnectionProperties(BaseModel):
54
54
  uri: The URI for the connection
55
55
  """
56
56
  label: str | None = None
57
- type: ConnectionType = Field(default=ConnectionType.SQLALCHEMY)
57
+ type: ConnectionTypeEnum = Field(default=ConnectionTypeEnum.SQLALCHEMY)
58
58
  uri: str
59
59
  sa_create_engine_args: dict[str, Any] = Field(default_factory=dict)
60
60
 
@@ -64,14 +64,14 @@ class ConnectionProperties(BaseModel):
64
64
  Creates and caches a SQLAlchemy engine if the connection type is sqlalchemy.
65
65
  Returns None for other connection types.
66
66
  """
67
- if self.type == ConnectionType.SQLALCHEMY:
67
+ if self.type == ConnectionTypeEnum.SQLALCHEMY:
68
68
  return create_engine(self.uri, **self.sa_create_engine_args)
69
69
  else:
70
70
  raise ValueError(f'Connection type "{self.type}" does not support engine property')
71
71
 
72
72
  @cached_property
73
73
  def dialect(self) -> str:
74
- if self.type == ConnectionType.SQLALCHEMY:
74
+ if self.type == ConnectionTypeEnum.SQLALCHEMY:
75
75
  dialect = self.engine.dialect.name
76
76
  else:
77
77
  url = urlparse(self.uri)
@@ -83,7 +83,7 @@ class ConnectionProperties(BaseModel):
83
83
 
84
84
  @cached_property
85
85
  def attach_uri_for_duckdb(self) -> str | None:
86
- if self.type == ConnectionType.SQLALCHEMY:
86
+ if self.type == ConnectionTypeEnum.SQLALCHEMY:
87
87
  url = self.engine.url
88
88
  host = url.host
89
89
  port = url.port
@@ -79,6 +79,8 @@ class ModelBuilder:
79
79
  duckdb_stg_path.replace(duckdb_dev_path)
80
80
  elif duckdb_path.exists():
81
81
  shutil.copy(duckdb_path, duckdb_dev_path)
82
+ else:
83
+ duckdb_dev_path.unlink(missing_ok=True) # delete any lingering development copy to create a fresh one later
82
84
 
83
85
  self._logger.log_activity_time("creating development copy of virtual data environment", start)
84
86
 
@@ -47,7 +47,7 @@ class QueryModelConfig(ModelConfig):
47
47
 
48
48
 
49
49
  class BuildModelConfig(QueryModelConfig):
50
- materialization: str = Field(default="TABLE", description="The materialization of the model (ignored if Python model which is always a table)")
50
+ materialization: str = Field(default="VIEW", description="The materialization of the model (ignored if Python model which is always a table)")
51
51
 
52
52
  def get_sql_for_build(self, model_name: str, select_query: str) -> str:
53
53
  if self.materialization.upper() == "TABLE":
@@ -57,7 +57,7 @@ class BuildModelConfig(QueryModelConfig):
57
57
  else:
58
58
  raise ValueError(f"Invalid materialization: {self.materialization}")
59
59
 
60
- create_prefix = f"CREATE OR REPLACE {materialization} {model_name} AS\n"
60
+ create_prefix = f"CREATE OR REPLACE {materialization} {model_name} AS\n\n"
61
61
  return create_prefix + select_query
62
62
 
63
63
 
@@ -70,5 +70,5 @@ class FederateModelConfig(QueryModelConfig):
70
70
 
71
71
  def get_sql_for_create(self, model_name: str, select_query: str) -> str:
72
72
  materialization = "TABLE" if self.eager else "VIEW"
73
- create_prefix = f"CREATE {materialization} {model_name} AS\n"
73
+ create_prefix = f"CREATE {materialization} {model_name} AS\n\n"
74
74
  return create_prefix + select_query
@@ -3,7 +3,7 @@ from dataclasses import dataclass, field
3
3
  from typing import Callable, Generic, TypeVar, Any
4
4
  import polars as pl, pandas as pd
5
5
 
6
- from .arguments.run_time_args import BuildModelArgs
6
+ from ._arguments._run_time_args import BuildModelArgs
7
7
  from ._model_configs import ModelConfig
8
8
 
9
9
 
squirrels/_models.py CHANGED
@@ -9,7 +9,7 @@ import polars as pl, pandas as pd, networkx as nx
9
9
 
10
10
  from . import _constants as c, _utils as u, _py_module as pm, _model_queries as mq, _model_configs as mc, _sources as src, _api_response_models as arm
11
11
  from ._exceptions import FileExecutionError, InvalidInputError
12
- from .arguments.run_time_args import ContextArgs, ModelArgs, BuildModelArgs
12
+ from ._arguments._run_time_args import ContextArgs, ModelArgs, BuildModelArgs
13
13
  from ._auth import BaseUser
14
14
  from ._connection_set import ConnectionsArgs, ConnectionSet, ConnectionProperties
15
15
  from ._manifest import DatasetConfig
@@ -253,8 +253,12 @@ class SourceModel(StaticModel):
253
253
  connection_props = self.conn_set.get_connection(conn_name)
254
254
  if isinstance(connection_props, ConnectionProperties):
255
255
  dialect = connection_props.dialect
256
+ attach_uri = connection_props.attach_uri_for_duckdb
256
257
  else:
257
- raise u.ConfigurationError(f'Unable to use connection "{conn_name}" for source "{self.name}"')
258
+ raise u.ConfigurationError(f'Unable to use connection "{conn_name}" for source "{self.name}". Connection "{conn_name}" must be a ConnectionProperties object')
259
+
260
+ if attach_uri is None:
261
+ raise u.ConfigurationError(f'Loading to duckdb is not supported for source "{self.name}" since its connection "{conn_name}" uses an unsupported dialect')
258
262
 
259
263
  result = u.run_duckdb_stmt(self.logger, local_conn, f"FROM (SHOW DATABASES) WHERE database_name = 'db_{conn_name}'").fetchone()
260
264
  if result is None:
@@ -351,7 +355,7 @@ class QueryModel(DataModel):
351
355
  def _get_compile_sql_model_args_from_ctx_args(
352
356
  self, ctx: dict[str, Any], ctx_args: ContextArgs
353
357
  ) -> dict[str, Any]:
354
- is_placeholder = lambda placeholder: placeholder in ctx_args.placeholders
358
+ is_placeholder = lambda placeholder: placeholder in ctx_args._placeholders_copy
355
359
  kwargs = {
356
360
  "proj_vars": ctx_args.proj_vars, "env_vars": ctx_args.env_vars, "user": ctx_args.user, "prms": ctx_args.prms,
357
361
  "traits": ctx_args.traits, "ctx": ctx, "is_placeholder": is_placeholder, "set_placeholder": ctx_args.set_placeholder,
@@ -424,6 +428,11 @@ class QueryModel(DataModel):
424
428
  dependent_model_names.add(self.name)
425
429
  for dep_model in self.upstreams.values():
426
430
  dep_model.retrieve_dependent_query_models(dependent_model_names)
431
+
432
+ def _log_sql_to_run(self, sql: str, placeholders: dict[str, Any]) -> None:
433
+ log_msg = f"SQL to run for model '{self.name}':\n{sql}"
434
+ log_msg += f"\n\n(with placeholders: {placeholders})"
435
+ self.logger.info(log_msg)
427
436
 
428
437
 
429
438
  @dataclass
@@ -460,6 +469,7 @@ class DbviewModel(QueryModel):
460
469
  return "{{ source(\"" + source_name + "\") }}"
461
470
 
462
471
  kwargs["source"] = source
472
+ kwargs["ref"] = source
463
473
  return kwargs
464
474
 
465
475
  def _get_duckdb_query(self, read_dialect: str, query: str) -> str:
@@ -476,15 +486,15 @@ class DbviewModel(QueryModel):
476
486
  connection_props = self.conn_set.get_connection(connection_name)
477
487
 
478
488
  if self.model_config.translate_to_duckdb and isinstance(connection_props, ConnectionProperties):
479
- macros = {
480
- "source": lambda source_name: "venv." + source_name
489
+ macros = {
490
+ "source": lambda source_name: "venv." + source_name
481
491
  }
482
492
  compiled_query2 = self._get_compiled_sql_query_str(compiled_query_str, macros)
483
493
  compiled_query_str = self._get_duckdb_query(connection_props.dialect, compiled_query2)
484
494
  is_duckdb = True
485
495
  else:
486
- macros = {
487
- "source": lambda source_name: self.sources[source_name].get_table()
496
+ macros = {
497
+ "source": lambda source_name: self.sources[source_name].get_table()
488
498
  }
489
499
  compiled_query_str = self._get_compiled_sql_query_str(compiled_query_str, macros)
490
500
  is_duckdb = False
@@ -518,7 +528,7 @@ class DbviewModel(QueryModel):
518
528
  if is_duckdb:
519
529
  local_conn = conn.cursor()
520
530
  try:
521
- self.logger.info(f"Running duckdb query: {query}")
531
+ self.logger.info(f"Running dbview '{self.name}' on duckdb")
522
532
  return local_conn.sql(query, params=placeholders).pl()
523
533
  except duckdb.CatalogException as e:
524
534
  raise InvalidInputError(61, f'Model "{self.name}" depends on static data models that cannot be found.')
@@ -527,10 +537,12 @@ class DbviewModel(QueryModel):
527
537
  finally:
528
538
  local_conn.close()
529
539
  else:
530
- return self._run_sql_query_on_connection(connection_name, query, placeholders)
540
+ self.logger.info(f"Running dbview '{self.name}' on connection: {connection_name}")
541
+ return self.conn_set.run_sql_query_from_conn_name(query, connection_name, placeholders)
531
542
  except RuntimeError as e:
532
543
  raise FileExecutionError(f'Failed to run dbview sql model "{self.name}"', e)
533
544
 
545
+ self._log_sql_to_run(query, placeholders)
534
546
  result = await asyncio.to_thread(run_sql_query_on_connection, is_duckdb, query, placeholders)
535
547
  self.result = result.lazy()
536
548
 
@@ -580,7 +592,7 @@ class FederateModel(QueryModel):
580
592
  connections = self.conn_set.get_connections_as_dict()
581
593
 
582
594
  def run_external_sql(connection_name: str, sql_query: str) -> pl.DataFrame:
583
- return self._run_sql_query_on_connection(connection_name, sql_query, ctx_args.placeholders)
595
+ return self._run_sql_query_on_connection(connection_name, sql_query, ctx_args._placeholders_copy)
584
596
 
585
597
  conn_args = ConnectionsArgs(ctx_args.project_path, ctx_args.proj_vars, ctx_args.env_vars)
586
598
  build_model_args = BuildModelArgs(conn_args, connections, dependencies, self._ref_for_python, run_external_sql)
@@ -636,10 +648,12 @@ class FederateModel(QueryModel):
636
648
  query = compiled_query.query
637
649
 
638
650
  def create_table(local_conn: duckdb.DuckDBPyConnection):
639
- placeholer_exists = lambda key: re.search(r"\$" + key + r"(?!\w)", query)
640
- existing_placeholders = {key: value for key, value in placeholders.items() if placeholer_exists(key)}
651
+ # DuckDB doesn't support specifying named parameters that are not used in the query, so filtering them out
652
+ placeholder_exists = lambda key: re.search(r"\$" + key + r"(?!\w)", query)
653
+ existing_placeholders = {key: value for key, value in placeholders.items() if placeholder_exists(key)}
641
654
 
642
655
  create_query = self.model_config.get_sql_for_create(self.name, query)
656
+ self._log_sql_to_run(create_query, existing_placeholders)
643
657
  try:
644
658
  return local_conn.execute(create_query, existing_placeholders)
645
659
  except duckdb.CatalogException as e:
@@ -772,7 +786,7 @@ class BuildModel(StaticModel, QueryModel):
772
786
  create_query = self.model_config.get_sql_for_build(self.name, query)
773
787
  local_conn = conn.cursor()
774
788
  try:
775
- return u.run_duckdb_stmt(self.logger, local_conn, create_query)
789
+ return u.run_duckdb_stmt(self.logger, local_conn, create_query, model_name=self.name)
776
790
  except Exception as e:
777
791
  raise FileExecutionError(f'Failed to build static sql model "{self.name}"', e) from e
778
792
  finally:
@@ -920,7 +934,7 @@ class DAG:
920
934
 
921
935
  self._compile_models(context, ctx_args, recurse)
922
936
 
923
- self.placeholders = ctx_args.placeholders
937
+ self.placeholders = ctx_args._placeholders_copy
924
938
  if runquery:
925
939
  await self._run_models()
926
940
 
@@ -1,10 +1,10 @@
1
- from squirrels import DashboardArgs, dashboards as d
1
+ from squirrels import arguments as args, dashboards as d
2
2
  from matplotlib import pyplot as plt, figure as f, axes as a
3
3
 
4
4
 
5
- async def main(sqrl: DashboardArgs) -> d.PngDashboard:
6
- spending_by_month_df = await sqrl.dataset("federate_dataset_example", fixed_parameters={"group_by": "g4"})
7
- spending_by_subcategory_df = await sqrl.dataset("federate_dataset_example", fixed_parameters={"group_by": "g3"})
5
+ async def main(sqrl: args.DashboardArgs) -> d.PngDashboard:
6
+ spending_by_month_df = await sqrl.dataset("federate_dataset_example", fixed_parameters={"group_by": "month"})
7
+ spending_by_subcategory_df = await sqrl.dataset("federate_dataset_example", fixed_parameters={"group_by": "subcat"})
8
8
 
9
9
  # Create a figure with two subplots
10
10
  fig, (ax0, ax1) = plt.subplots(2, 1, figsize=(8, 8), height_ratios=(1, 2))
@@ -14,9 +14,9 @@ depends_on:
14
14
  - name: dataset_example_month
15
15
  dataset: federate_dataset_example
16
16
  fixed_parameters:
17
- - group_by: g4 (Month)
17
+ - group_by: month (Month)
18
18
 
19
19
  - name: dataset_example_subcategory
20
20
  dataset: federate_dataset_example
21
21
  fixed_parameters:
22
- - group_by: g3 (Subcategory)
22
+ - group_by: subcat (Subcategory)
@@ -0,0 +1,17 @@
1
+ {%- macro date_and_amount_filters(use_from_range) -%}
2
+ {%- if use_from_range -%}
3
+
4
+ date >= {{ ctx.start_date_from_range | quote }}
5
+ AND date <= {{ ctx.end_date_from_range | quote }}
6
+ AND amount >= {{ ctx.min_amount_from_range }}
7
+ AND amount <= {{ ctx.max_amount_from_range }}
8
+
9
+ {%- else -%}
10
+
11
+ date >= {{ ctx.start_date | quote }}
12
+ AND date <= {{ ctx.end_date | quote }}
13
+ AND amount >= {{ ctx.min_amount }}
14
+ AND amount <= {{ ctx.max_amount }}
15
+
16
+ {%- endif -%}
17
+ {%- endmacro -%}
@@ -1,8 +1,8 @@
1
- from squirrels import BuildModelArgs
1
+ from squirrels import arguments as args
2
2
  import polars as pl, pandas as pd
3
3
 
4
4
 
5
- def main(sqrl: BuildModelArgs) -> pl.LazyFrame | pl.DataFrame | pd.DataFrame:
5
+ def main(sqrl: args.BuildModelArgs) -> pl.LazyFrame | pl.DataFrame | pd.DataFrame:
6
6
  """
7
7
  Create a build model by joining/processing sources or other build models to form a new
8
8
  Python DataFrame (using polars LazyFrame, polars DataFrame, or pandas DataFrame).
@@ -1,4 +1,4 @@
1
- {# DuckDB dialect #}
1
+ {#- DuckDB dialect -#}
2
2
 
3
3
  SELECT a.id,
4
4
  STRFTIME(a.date, '%Y-%m-%d') AS date,
@@ -1,6 +1,8 @@
1
1
  description: |
2
2
  This is an example of a build model. It adds a new column called "month" to the source table "src_transactions".
3
3
 
4
+ materialization: TABLE # optional - defaults to "VIEW" for SQL models, ignored and always a "TABLE" for Python models
5
+
4
6
  depends_on: # optional for SQL models - the "ref" macro also adds to this set
5
7
  - src_transactions
6
8
  - seed_categories
@@ -1,4 +1,4 @@
1
- {# SQLite dialect (based on connection used) #}
1
+ {#- SQLite dialect (based on connection used) -#}
2
2
 
3
3
  SELECT STRFTIME('%Y-%m', date) AS month
4
4
  , printf('%.2f', SUM(amount)) as total_amount
@@ -0,0 +1,41 @@
1
+ from squirrels import arguments as args
2
+ import polars as pl, pandas as pd
3
+
4
+
5
+ def main(sqrl: args.ModelArgs) -> pl.LazyFrame | pl.DataFrame | pd.DataFrame:
6
+ """
7
+ Create federated models by joining/processing dependent models (sources, seeds, builds, dbviews, other federates, etc.) to
8
+ form a new Python DataFrame (using polars LazyFrame, polars DataFrame, or pandas DataFrame).
9
+ """
10
+ df = sqrl.ref("build_example")
11
+
12
+ df = df.filter(
13
+ (pl.col("date") >= sqrl.ctx["start_date_from_range"]) &
14
+ (pl.col("date") <= sqrl.ctx["end_date_from_range"]) &
15
+ (pl.col("amount") >= sqrl.ctx["min_amount_from_range"]) &
16
+ (pl.col("amount") <= sqrl.ctx["max_amount_from_range"])
17
+ )
18
+
19
+ if sqrl.ctx["has_categories"]:
20
+ categories: list[str] = sqrl.ctx["categories"]
21
+ df = df.filter(pl.col("category_id").is_in(categories))
22
+
23
+ if sqrl.ctx["has_subcategories"]:
24
+ subcategories: list[str] = sqrl.ctx["subcategories"]
25
+ df = df.filter(pl.col("subcategory_id").is_in(subcategories))
26
+
27
+ dimension_cols: list[str] = sqrl.ctx["group_by_cols"]
28
+ df = df.group_by(dimension_cols).agg(
29
+ pl.sum("amount").cast(pl.Decimal(precision=15, scale=2)).alias("total_amount")
30
+ )
31
+ df = df.rename(sqrl.ctx["rename_dict"])
32
+
33
+ order_by_cols: list[str] = sqrl.ctx["order_by_cols"]
34
+ df = df.select(*order_by_cols, "total_amount") \
35
+ .sort(order_by_cols, descending=True)
36
+
37
+ if "limit" in sqrl.ctx:
38
+ limit: int = sqrl.ctx["limit"]
39
+ df = df.limit(limit)
40
+
41
+ return df
@@ -0,0 +1,25 @@
1
+ {#- DuckDB dialect -#}
2
+
3
+ SELECT {{ ctx.select_dim_cols | join }}
4
+ , CAST(SUM(amount) AS DECIMAL(15, 2)) as total_amount
5
+
6
+ {# ref() can be used on a sources, seeds, builds, dbviews, or other federate models -#}
7
+ FROM {{ ref("build_example") }} AS a
8
+
9
+ WHERE {{ date_and_amount_filters(use_from_range=true) }}
10
+ {%- if ctx.has_categories %}
11
+ AND category_id IN ({{ ctx.categories | quote_and_join }})
12
+ {%- endif %}
13
+ {%- if ctx.has_subcategories %}
14
+ AND subcategory_id IN ({{ ctx.subcategories | quote_and_join }})
15
+ {%- endif %}
16
+
17
+ GROUP BY {{ ctx.group_by_cols | join }}
18
+
19
+ ORDER BY {{ ctx.order_by_cols_desc | join }}
20
+
21
+ {%- if ctx.limit %}
22
+
23
+ LIMIT {{ ctx.limit }}
24
+
25
+ {%- endif %}