squirrels 0.1.1.post1__py3-none-any.whl → 0.2.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of squirrels might be problematic. Click here for more details.

Files changed (74) hide show
  1. squirrels/__init__.py +10 -16
  2. squirrels/_api_server.py +234 -80
  3. squirrels/_authenticator.py +84 -0
  4. squirrels/_command_line.py +60 -72
  5. squirrels/_connection_set.py +96 -0
  6. squirrels/_constants.py +114 -33
  7. squirrels/_environcfg.py +77 -0
  8. squirrels/_initializer.py +126 -67
  9. squirrels/_manifest.py +195 -168
  10. squirrels/_models.py +495 -0
  11. squirrels/_package_loader.py +26 -0
  12. squirrels/_parameter_configs.py +401 -0
  13. squirrels/_parameter_sets.py +188 -0
  14. squirrels/_py_module.py +60 -0
  15. squirrels/_timer.py +36 -0
  16. squirrels/_utils.py +81 -49
  17. squirrels/_version.py +2 -2
  18. squirrels/arguments/init_time_args.py +32 -0
  19. squirrels/arguments/run_time_args.py +82 -0
  20. squirrels/data_sources.py +380 -155
  21. squirrels/dateutils.py +86 -57
  22. squirrels/package_data/base_project/Dockerfile +15 -0
  23. squirrels/package_data/base_project/connections.yml +7 -0
  24. squirrels/package_data/base_project/database/{sample_database.db → expenses.db} +0 -0
  25. squirrels/package_data/base_project/environcfg.yml +29 -0
  26. squirrels/package_data/base_project/ignores/.dockerignore +8 -0
  27. squirrels/package_data/base_project/ignores/.gitignore +7 -0
  28. squirrels/package_data/base_project/models/dbviews/database_view1.py +36 -0
  29. squirrels/package_data/base_project/models/dbviews/database_view1.sql +15 -0
  30. squirrels/package_data/base_project/models/federates/dataset_example.py +20 -0
  31. squirrels/package_data/base_project/models/federates/dataset_example.sql +3 -0
  32. squirrels/package_data/base_project/parameters.yml +109 -0
  33. squirrels/package_data/base_project/pyconfigs/auth.py +47 -0
  34. squirrels/package_data/base_project/pyconfigs/connections.py +28 -0
  35. squirrels/package_data/base_project/pyconfigs/context.py +45 -0
  36. squirrels/package_data/base_project/pyconfigs/parameters.py +55 -0
  37. squirrels/package_data/base_project/seeds/mocks/category.csv +3 -0
  38. squirrels/package_data/base_project/seeds/mocks/max_filter.csv +2 -0
  39. squirrels/package_data/base_project/seeds/mocks/subcategory.csv +6 -0
  40. squirrels/package_data/base_project/squirrels.yml.j2 +57 -0
  41. squirrels/package_data/base_project/tmp/.gitignore +2 -0
  42. squirrels/package_data/static/script.js +159 -63
  43. squirrels/package_data/static/style.css +79 -15
  44. squirrels/package_data/static/widgets.js +133 -0
  45. squirrels/package_data/templates/index.html +65 -23
  46. squirrels/package_data/templates/index2.html +22 -0
  47. squirrels/parameter_options.py +216 -119
  48. squirrels/parameters.py +407 -478
  49. squirrels/user_base.py +58 -0
  50. squirrels-0.2.0.dev0.dist-info/METADATA +126 -0
  51. squirrels-0.2.0.dev0.dist-info/RECORD +56 -0
  52. {squirrels-0.1.1.post1.dist-info → squirrels-0.2.0.dev0.dist-info}/WHEEL +1 -2
  53. squirrels-0.2.0.dev0.dist-info/entry_points.txt +3 -0
  54. squirrels/_credentials_manager.py +0 -87
  55. squirrels/_module_loader.py +0 -37
  56. squirrels/_parameter_set.py +0 -151
  57. squirrels/_renderer.py +0 -286
  58. squirrels/_timed_imports.py +0 -37
  59. squirrels/connection_set.py +0 -126
  60. squirrels/package_data/base_project/.gitignore +0 -4
  61. squirrels/package_data/base_project/connections.py +0 -20
  62. squirrels/package_data/base_project/datasets/sample_dataset/context.py +0 -22
  63. squirrels/package_data/base_project/datasets/sample_dataset/database_view1.py +0 -29
  64. squirrels/package_data/base_project/datasets/sample_dataset/database_view1.sql.j2 +0 -12
  65. squirrels/package_data/base_project/datasets/sample_dataset/final_view.py +0 -11
  66. squirrels/package_data/base_project/datasets/sample_dataset/final_view.sql.j2 +0 -3
  67. squirrels/package_data/base_project/datasets/sample_dataset/parameters.py +0 -47
  68. squirrels/package_data/base_project/datasets/sample_dataset/selections.cfg +0 -9
  69. squirrels/package_data/base_project/squirrels.yaml +0 -22
  70. squirrels-0.1.1.post1.dist-info/METADATA +0 -67
  71. squirrels-0.1.1.post1.dist-info/RECORD +0 -40
  72. squirrels-0.1.1.post1.dist-info/entry_points.txt +0 -2
  73. squirrels-0.1.1.post1.dist-info/top_level.txt +0 -1
  74. {squirrels-0.1.1.post1.dist-info → squirrels-0.2.0.dev0.dist-info}/LICENSE +0 -0
squirrels/_renderer.py DELETED
@@ -1,286 +0,0 @@
1
- from typing import Dict, Tuple, Optional, Union, Callable, Any
2
- from functools import partial
3
- from configparser import ConfigParser
4
- import concurrent.futures, os, json, time
5
-
6
- from squirrels import _constants as c, _manifest as mf, _utils
7
- from squirrels.connection_set import ConnectionSet, sqldf
8
- from squirrels.data_sources import DataSource
9
- from squirrels._parameter_set import ParameterSet
10
- from squirrels._utils import ConfigurationError
11
- from squirrels._timed_imports import pandas as pd, timer
12
-
13
- ContextFunc = Optional[Callable[..., Dict[str, Any]]]
14
- DatabaseViews = Optional[Dict[str, pd.DataFrame]]
15
- Query = Union[Callable[..., pd.DataFrame], str]
16
-
17
-
18
- class Renderer:
19
- def __init__(self, dataset: str, manifest: mf.Manifest, conn_set: ConnectionSet, raw_param_set: ParameterSet,
20
- context_func: Callable[..., Dict[str, Any]], raw_query_by_db_view: Dict[str, Query],
21
- raw_final_view_query: Query, excel_file: Optional[pd.ExcelFile] = None):
22
- self.dataset = dataset
23
- self.manifest = manifest
24
- self.conn_set = conn_set
25
- self.context_func = context_func
26
- self.raw_query_by_db_view = raw_query_by_db_view
27
- self.raw_final_view_query = raw_final_view_query
28
-
29
- start = time.time()
30
- self.param_set: ParameterSet = self._convert_param_set_datasources(raw_param_set, excel_file)
31
- timer.add_activity_time(f"convert datasources - dataset {dataset}", start)
32
-
33
- def _convert_param_set_datasources(self, param_set: ParameterSet, excel_file: Optional[pd.ExcelFile] = None) -> ParameterSet:
34
- datasources = param_set.get_datasources()
35
- if excel_file is not None:
36
- df_dict = pd.read_excel(excel_file, None)
37
- for key in datasources:
38
- if key not in df_dict:
39
- raise ConfigurationError('No sheet found for parameter "{key}" in the Excel workbook')
40
- else:
41
- def get_dataframe_from_query(item: Tuple[str, DataSource]) -> pd.DataFrame:
42
- key, datasource = item
43
- df = self.conn_set.get_dataframe_from_query(datasource.connection_name, datasource.get_query())
44
- return key, df
45
-
46
- with concurrent.futures.ThreadPoolExecutor() as executor:
47
- df_dict = dict(executor.map(get_dataframe_from_query, datasources.items()))
48
-
49
- param_set.convert_datasource_params(df_dict)
50
- return param_set
51
-
52
- def apply_selections(self, selections: Dict[str, str], updates_only: bool = False) -> ParameterSet:
53
- start = time.time()
54
- parameter_set = self.param_set
55
- parameters_dict = parameter_set.get_parameters_as_ordered_dict()
56
-
57
- # iterating through parameters dict instead of query_params since order matters for cascading parameters
58
- for param_name, parameter in parameters_dict.items():
59
- if param_name in selections:
60
- value = selections[param_name]
61
- parameter = parameter_set.get_parameter(param_name).with_selection(value)
62
- updates = parameter.get_all_dependent_params()
63
- if updates_only:
64
- parameter_set = updates
65
- break
66
- parameter_set = parameter_set.merge(updates)
67
- timer.add_activity_time(f"apply selections - dataset {self.dataset}", start)
68
-
69
- return parameter_set
70
-
71
- def _render_context(self, context_func: ContextFunc, param_set: ParameterSet) -> Dict[str, Any]:
72
- try:
73
- return context_func(prms=param_set.get_parameters_as_ordered_dict()) if context_func is not None else {}
74
- except Exception as e:
75
- raise ConfigurationError(f'Error in the {c.CONTEXT_FILE} function for dataset "{self.dataset}"') from e
76
-
77
- def _get_args(self, param_set: ParameterSet, context: Dict[str, Any], db_view: str = None) -> Dict:
78
- if db_view is not None:
79
- args = self.manifest.get_view_args(self.dataset, db_view)
80
- else:
81
- args = self.manifest.get_view_args(self.dataset)
82
- return {
83
- 'prms': param_set.get_parameters_as_ordered_dict(),
84
- 'ctx': context,
85
- 'args': args
86
- }
87
-
88
- def _render_query_from_raw(self, raw_query: Query, args: Dict) -> Query:
89
- if isinstance(raw_query, str):
90
- template = _utils.j2_env.from_string(raw_query)
91
- return template.render(args)
92
- else:
93
- return partial(raw_query, **args)
94
-
95
- def _render_dataframe_from_sql(self, db_view_name: str, sql_str: str,
96
- database_views: DatabaseViews = None) -> pd.DataFrame:
97
- if database_views is not None:
98
- return sqldf(sql_str, database_views)
99
- else:
100
- conn_name = self.manifest.get_database_view_db_connection(self.dataset, db_view_name)
101
- return self.conn_set.get_dataframe_from_query(conn_name, sql_str)
102
-
103
- def _render_dataframe_from_py_func(self, db_view_name: str, py_func: Callable[[Any], pd.DataFrame],
104
- database_views: DatabaseViews = None) -> pd.DataFrame:
105
- if database_views is not None:
106
- try:
107
- return py_func(database_views=database_views)
108
- except Exception as e:
109
- raise ConfigurationError(f'Error in the final view python function for dataset "{self.dataset}"') from e
110
- else:
111
- conn_name = self.manifest.get_database_view_db_connection(self.dataset, db_view_name)
112
- connection_pool = self.conn_set.get_connection_pool(conn_name)
113
- try:
114
- return py_func(connection_pool=connection_pool, connection_set=self.conn_set)
115
- except Exception as e:
116
- raise ConfigurationError(f'Error in the python function for database view "{db_view_name}" in dataset "{self.dataset}"') from e
117
-
118
- def _render_db_view_dataframes(self, query_by_db_view: Dict[str, Query]) -> Dict[str, pd.DataFrame]:
119
- def run_single_query(item: Tuple[str, Query]) -> Tuple[str, pd.DataFrame]:
120
- view_name, query = item
121
- if isinstance(query, str):
122
- return view_name, self._render_dataframe_from_sql(view_name, query)
123
- else:
124
- return view_name, self._render_dataframe_from_py_func(view_name, query)
125
-
126
- with concurrent.futures.ThreadPoolExecutor() as executor:
127
- df_by_view_name = executor.map(run_single_query, query_by_db_view.items())
128
-
129
- return dict(df_by_view_name)
130
-
131
- def _render_final_view_dataframe(self, df_by_db_views: Dict[str, pd.DataFrame],
132
- final_view_query: Optional[Query]) -> pd.DataFrame:
133
- if final_view_query in df_by_db_views:
134
- return df_by_db_views[final_view_query]
135
- elif isinstance(final_view_query, str):
136
- return self._render_dataframe_from_sql("final_view", final_view_query, df_by_db_views)
137
- else:
138
- return self._render_dataframe_from_py_func("final_view", final_view_query, df_by_db_views)
139
-
140
- def load_results(self, selections: Dict[str, str], run_query: bool = True) \
141
- -> Tuple[ParameterSet, Dict[str, Query], Query, Dict[str, pd.DataFrame], Optional[pd.DataFrame]]:
142
-
143
- # apply selections and render context
144
- param_set = self.apply_selections(selections)
145
- start = time.time()
146
- context = self._render_context(self.context_func, param_set)
147
- timer.add_activity_time(f"render context - dataset {self.dataset}", start)
148
-
149
- # render database view queries
150
- start = time.time()
151
- query_by_db_view = {}
152
- for db_view, raw_query in self.raw_query_by_db_view.items():
153
- args = self._get_args(param_set, context, db_view)
154
- query_by_db_view[db_view] = self._render_query_from_raw(raw_query, args)
155
- timer.add_activity_time(f"render database view queries - dataset {self.dataset}", start)
156
-
157
- # render final view query
158
- start = time.time()
159
- args = self._get_args(param_set, context)
160
- final_view_query = self._render_query_from_raw(self.raw_final_view_query, args)
161
- timer.add_activity_time(f"render final view query - dataset {self.dataset}", start)
162
-
163
- # render all dataframes if "run_query" is enabled
164
- df_by_db_views = {}
165
- final_view_df = None
166
- if run_query:
167
- start = time.time()
168
- df_by_db_views = self._render_db_view_dataframes(query_by_db_view)
169
- timer.add_activity_time(f"execute dataview view queries - dataset {self.dataset}", start)
170
-
171
- start = time.time()
172
- final_view_df = self._render_final_view_dataframe(df_by_db_views, final_view_query)
173
- timer.add_activity_time(f"execute final view query - dataset {self.dataset}", start)
174
-
175
- return param_set, query_by_db_view, final_view_query, df_by_db_views, final_view_df
176
-
177
-
178
- def default_context_func(*args, **kwargs):
179
- return {}
180
-
181
-
182
- class RendererIOWrapper:
183
- def __init__(self, dataset: str, manifest: mf.Manifest, conn_set: ConnectionSet, excel_file_name: Optional[str] = None):
184
- dataset_folder = manifest.get_dataset_folder(dataset)
185
- parameters_path = _utils.join_paths(dataset_folder, c.PARAMETERS_FILE)
186
- args = manifest.get_dataset_args(dataset)
187
- parameters_module = _utils.import_file_as_module(parameters_path)
188
- try:
189
- parameter_set = ParameterSet(parameters_module.main(args=args))
190
- except Exception as e:
191
- raise ConfigurationError(f'Error in the {c.PARAMETERS_FILE} function for dataset "{dataset}"') from e
192
-
193
- context_path = _utils.join_paths(dataset_folder, c.CONTEXT_FILE)
194
- try:
195
- context_module = _utils.import_file_as_module(context_path)
196
- context_func = partial(context_module.main, args=args)
197
- except FileNotFoundError:
198
- context_func = default_context_func
199
-
200
- excel_file = None
201
- if excel_file_name is not None:
202
- excel_file_path = _utils.join_paths(dataset_folder, excel_file_name)
203
- excel_file = pd.ExcelFile(excel_file_path)
204
-
205
- db_views = manifest.get_all_database_view_names(dataset)
206
- raw_query_by_db_view = {}
207
- for db_view in db_views:
208
- db_view_template_path = str(manifest.get_database_view_file(dataset, db_view))
209
- raw_query_by_db_view[db_view] = self._get_raw_query(db_view_template_path)
210
-
211
- final_view_path = str(manifest.get_dataset_final_view_file(dataset))
212
- if final_view_path in db_views:
213
- raw_final_view_query = final_view_path
214
- else:
215
- raw_final_view_query = self._get_raw_query(final_view_path)
216
-
217
- self.dataset_folder = dataset_folder
218
- self.output_folder = _utils.join_paths(c.OUTPUTS_FOLDER, dataset)
219
- self.renderer = Renderer(dataset, manifest, conn_set, parameter_set, context_func,
220
- raw_query_by_db_view, raw_final_view_query, excel_file)
221
-
222
- def _get_raw_query(self, template_path: str) -> Dict[str, Query]:
223
- if template_path.endswith(".py"):
224
- return _utils.import_file_as_module(template_path).main
225
- else:
226
- with open(template_path, 'r') as f:
227
- sql_template = f.read()
228
- return sql_template
229
-
230
- def _get_selections(self, selection_cfg_file: Optional[str]) -> Dict[str, str]:
231
- if selection_cfg_file is not None:
232
- selection_cfg_path = _utils.join_paths(self.dataset_folder, selection_cfg_file)
233
- config = ConfigParser()
234
- config.read(selection_cfg_path)
235
- if config.has_section(c.PARAMETERS_SECTION):
236
- config_section = config[c.PARAMETERS_SECTION]
237
- return dict(config_section.items())
238
- return {}
239
-
240
- def _write_sql_file(self, view_name: str, query: Any):
241
- if isinstance(query, str):
242
- db_view_sql_output_path = _utils.join_paths(self.output_folder, view_name+'.sql')
243
- with open(db_view_sql_output_path, 'w') as f:
244
- f.write(query)
245
-
246
- def write_outputs(self, selection_cfg_file: Optional[str], run_query: bool) -> None:
247
- # create output folder if it doesn't exist
248
- if not os.path.exists(self.output_folder):
249
- os.makedirs(self.output_folder)
250
-
251
- # clear everything in output folder
252
- files = os.listdir(self.output_folder)
253
- for file in files:
254
- file_path = _utils.join_paths(self.output_folder, file)
255
- os.remove(file_path)
256
-
257
- # apply selections and render outputs
258
- selections = self._get_selections(selection_cfg_file)
259
- result = self.renderer.load_results(selections, run_query)
260
- param_set, query_by_db_view, final_view_query, df_by_db_views, final_view_df = result
261
-
262
- # write the parameters response
263
- param_set_dict = param_set.to_json_dict()
264
- parameter_json_output_path = _utils.join_paths(self.output_folder, c.PARAMETERS_OUTPUT)
265
- with open(parameter_json_output_path, 'w') as f:
266
- json.dump(param_set_dict, f, indent=4)
267
-
268
- # write the rendered sql queries for database views
269
- for db_view, query in query_by_db_view.items():
270
- self._write_sql_file(db_view, query)
271
-
272
- # write the rendered sql query for final view
273
- if final_view_query not in query_by_db_view:
274
- self._write_sql_file(c.FINAL_VIEW_OUT_STEM, final_view_query)
275
-
276
- # Run the sql queries and write output
277
- if run_query:
278
- for db_view, df in df_by_db_views.items():
279
- csv_file = _utils.join_paths(self.output_folder, db_view+'.csv')
280
- df.to_csv(csv_file, index=False)
281
-
282
- final_csv_path = _utils.join_paths(self.output_folder, c.FINAL_VIEW_OUT_STEM+'.csv')
283
- final_view_df.to_csv(final_csv_path, index=False)
284
-
285
- final_json_path = _utils.join_paths(self.output_folder, c.FINAL_VIEW_OUT_STEM+'.json')
286
- final_view_df.to_json(final_json_path, orient='table', index=False, indent=4)
@@ -1,37 +0,0 @@
1
- from typing import Dict, List
2
- import time
3
-
4
-
5
- class Timer:
6
- def __init__(self, verbose: bool = False):
7
- self.times: Dict[str, List[float]] = dict()
8
- self.verbose = verbose
9
-
10
- def add_activity_time(self, activity: str, start: float):
11
- if self.verbose:
12
- time_taken = (time.time()-start) * 10**3
13
- times_list = self.times.setdefault(activity, list())
14
- times_list.append(time_taken)
15
- print(f'Time taken for "{activity}": {time_taken}ms')
16
-
17
- def report_times(self):
18
- if self.verbose:
19
- for activity, times_list in self.times.items():
20
- total_time = sum(times_list)
21
- avg_time = total_time / len(times_list)
22
- print()
23
- print(f'Time statistics for "{activity}":')
24
- print(f' Total time: {total_time}ms')
25
- print(f' Average time: {avg_time}ms')
26
-
27
- timer = Timer()
28
-
29
-
30
- start = time.time()
31
- import pandas
32
- from pandas.api import types as pd_types
33
- timer.add_activity_time("import pandas", start)
34
-
35
- start = time.time()
36
- import jinja2
37
- timer.add_activity_time("import jinja", start)
@@ -1,126 +0,0 @@
1
- from typing import Dict, Union
2
- from importlib.machinery import SourceFileLoader
3
- from sqlalchemy import Engine, Pool
4
- import sqlite3
5
-
6
- from squirrels import _constants as c, _manifest as mf
7
- from squirrels._timed_imports import pandas as pd
8
- from squirrels._utils import ConfigurationError
9
-
10
- ConnectionPool = Union[Engine, Pool]
11
-
12
-
13
- class ConnectionSet:
14
- def __init__(self, conn_pools: Dict[str, ConnectionPool]) -> None:
15
- """
16
- Constructor for ConnectionSet, a wrapper class around a collection of Connection Pools or Sqlalchemy Engines
17
-
18
- Parameters:
19
- conn_pools: A dictionary of connection pool name to the corresponding Pool or Engine from sqlalchemy
20
- """
21
- self._conn_pools = conn_pools
22
-
23
- def get_connection_pool(self, conn_name: str = "default") -> ConnectionPool:
24
- """
25
- Gets to sqlalchemy Pool or Engine from the database connection name
26
-
27
- Parameters:
28
- conn_name: Name of Pool or Engine. If not provided, defaults to "default"
29
-
30
- Returns:
31
- A sqlalchemy Pool or Engine
32
- """
33
- try:
34
- connection_pool = self._conn_pools[conn_name]
35
- except KeyError as e:
36
- raise ConfigurationError(f'Connection name "{conn_name}" was not configured') from e
37
- return connection_pool
38
-
39
- def __getitem__(self, conn_name: str) -> ConnectionPool:
40
- """
41
- Same as get_connection_pool
42
- """
43
- return self.get_connection_pool(conn_name)
44
-
45
- def get_dataframe_from_query(self, conn_name: str, query: str) -> pd.DataFrame:
46
- """
47
- Runs a SQL query on a database connection name, and returns the results as pandas DataFrame
48
-
49
- Parameters:
50
- conn_name: Name of Pool or Engine
51
- query: The SQL query to run
52
-
53
- Returns:
54
- A pandas DataFrame
55
- """
56
- connector = self.get_connection_pool(conn_name)
57
- if isinstance(connector, Pool):
58
- conn = connector.connect()
59
- elif isinstance(connector, Engine):
60
- conn = connector.raw_connection()
61
- else:
62
- raise TypeError(f'Type for connection name "{conn_name}" not supported')
63
-
64
- try:
65
- cur = conn.cursor()
66
- cur.execute(query)
67
- df = pd.DataFrame(data=cur.fetchall(), columns=[x[0] for x in cur.description])
68
- finally:
69
- conn.close()
70
-
71
- return df
72
-
73
- def _dispose(self) -> None:
74
- """
75
- Disposes of all the connection pools in this ConnectionSet
76
- """
77
- for pool in self._conn_pools.values():
78
- pool.dispose()
79
-
80
-
81
- def _from_file(manifest: mf.Manifest) -> ConnectionSet:
82
- """
83
- Takes the DB Connections from both the squirrels.yaml and connections.py files and merges them
84
- into a single ConnectionSet
85
-
86
- Parameters:
87
- manifest: The object of Manifest class, the interface for the squirrels.yaml file
88
-
89
- Returns:
90
- A ConnectionSet with the DB connections from both squirrels.yaml and connections.py
91
- """
92
- connections = manifest.get_db_connections()
93
- try:
94
- module = SourceFileLoader(c.CONNECTIONS_FILE, c.CONNECTIONS_FILE).load_module()
95
- except FileNotFoundError:
96
- module = None
97
-
98
- if module is not None:
99
- proj_vars = manifest.get_proj_vars()
100
- try:
101
- conn_from_py_file = module.main(proj_vars)
102
- except Exception as e:
103
- raise ConfigurationError(f'Error in the {c.CONNECTIONS_FILE} file') from e
104
- else:
105
- conn_from_py_file = {}
106
- return ConnectionSet({**connections, **conn_from_py_file})
107
-
108
-
109
- def sqldf(query: str, df_by_db_views: Dict[str, pd.DataFrame]) -> pd.DataFrame:
110
- """
111
- Uses a dictionary of dataframes to execute a SQL query in an in-memory sqlite database
112
-
113
- Parameters:
114
- query: The SQL query to run using sqlite
115
- df_by_db_views: A dictionary of table names to their pandas Dataframe
116
-
117
- Returns:
118
- The result as a pandas Dataframe from running the query
119
- """
120
- conn = sqlite3.connect(":memory:")
121
- try:
122
- for db_view, df in df_by_db_views.items():
123
- df.to_sql(db_view, conn, index=False)
124
- return pd.read_sql(query, conn)
125
- finally:
126
- conn.close()
@@ -1,4 +0,0 @@
1
- __pycache__
2
- /outputs
3
- /modules
4
- /venv
@@ -1,20 +0,0 @@
1
- from typing import Dict, Union, Any
2
- from sqlalchemy import create_engine, Engine, Pool, QueuePool
3
-
4
- from squirrels import get_credential
5
-
6
-
7
- # Note: all connections must be shareable across multiple thread. No writes will occur on them
8
- def main(proj: Dict[str, Any], *p_args, **kwargs) -> Dict[str, Union[Engine, Pool]]:
9
-
10
- ## Example of getting the username and password set with "$ squirrels set-credential [key]"
11
- # cred = get_credential('my_key') # then use cred.username and cred.password to access the username and password
12
-
13
- # Create a connection pool / engine
14
- pool = create_engine('sqlite:///./database/sample_database.db')
15
-
16
- ## Example of using QueuePool instead for a custom db connector:
17
- # connection_creator = lambda: sqlite3.connect('./database/sample_database.db', check_same_thread=False)
18
- # pool = QueuePool(connection_creator)
19
-
20
- return {'default': pool}
@@ -1,22 +0,0 @@
1
- from typing import Dict, Any
2
- import squirrels as sr
3
-
4
-
5
- def main(prms: Dict[str, sr.Parameter], args: Dict[str, Any], *p_args, **kwargs) -> Dict[str, Any]:
6
- group_by_param: sr.SingleSelectParameter = prms["group_by"]
7
- start_date_param: sr.DateParameter = prms["start_date"]
8
- end_date_param: sr.DateParameter = prms["end_date"]
9
- category_param: sr.MultiSelectParameter = prms["category"]
10
- subcategory_param: sr.MultiSelectParameter = prms["subcategory"]
11
- min_amount_filter: sr.NumberParameter = prms["min_filter"]
12
- max_amount_filter: sr.NumberParameter = prms["max_filter"]
13
-
14
- return {
15
- "group_by_cols": group_by_param.get_selected("columns"),
16
- "start_date": start_date_param.get_selected_date_quoted(),
17
- "end_date": end_date_param.get_selected_date_quoted(),
18
- "categories": category_param.get_selected_labels_quoted_joined(),
19
- "subcategories": subcategory_param.get_selected_labels_quoted_joined(),
20
- "min_amount": min_amount_filter.get_selected_value(),
21
- "max_amount": max_amount_filter.get_selected_value()
22
- }
@@ -1,29 +0,0 @@
1
- from typing import Dict, Any
2
- from sqlalchemy import text
3
- import pandas as pd
4
-
5
- import squirrels as sr
6
-
7
-
8
- def main(connection_set: sr.ConnectionSet,
9
- prms: Dict[str, sr.Parameter], ctx: Dict[str, Any], args: Dict[str, Any],
10
- *p_args, **kwargs) -> pd.DataFrame:
11
-
12
- query = f"""
13
- SELECT {ctx["group_by_cols"]}
14
- , sum(-Amount) as Total_Amount
15
- FROM transactions
16
- WHERE Category IN ({ctx["categories"]})
17
- AND Subcategory IN ({ctx["subcategories"]})
18
- AND "Date" >= {ctx["start_date"]}
19
- AND "Date" <= {ctx["end_date"]}
20
- AND -Amount >= {ctx["min_amount"]}
21
- AND -Amount <= {ctx["max_amount"]}
22
- GROUP BY {ctx["group_by_cols"]}
23
- """
24
-
25
- engine = connection_set.get_connection_pool("default")
26
- conn = engine.raw_connection()
27
- df = pd.read_sql(query, conn)
28
- conn.close()
29
- return df
@@ -1,12 +0,0 @@
1
- -- %USE some_db -- TBA: this line is optional when connecting to the "default" db_connection
2
-
3
- SELECT {{ prms["group_by"].get_selected("columns") }} -- {{ ctx["group_by_cols"] }}
4
- , sum(-Amount) as Total_Amount
5
- FROM transactions
6
- WHERE Category IN ({{ prms["category"].get_selected_labels_quoted_joined() }}) -- ({{ ctx["categories"] }})
7
- AND Subcategory IN ({{ prms["subcategory"].get_selected_labels_quoted_joined() }}) -- ({{ ctx["subcategories"] }})
8
- AND "Date" >= {{ prms["start_date"].get_selected_date_quoted() }} -- {{ ctx["start_date"] }}
9
- AND "Date" <= {{ prms["end_date"].get_selected_date_quoted() }} -- {{ ctx["end_date"] }}
10
- AND -Amount >= {{ prms["min_filter"].get_selected_value() }} -- {{ ctx["min_amount"] }}
11
- AND -Amount <= {{ prms["max_filter"].get_selected_value() }} -- {{ ctx["max_amount"] }}
12
- GROUP BY {{ prms["group_by"].get_selected("columns") }} -- {{ ctx["group_by_cols"] }}
@@ -1,11 +0,0 @@
1
- from typing import Dict, Any
2
- import pandas as pd
3
- import squirrels as sr
4
-
5
-
6
- def main(database_views: Dict[str, pd.DataFrame],
7
- prms: Dict[str, sr.Parameter], ctx: Dict[str, Any], args: Dict[str, Any],
8
- *p_args, **kwargs) -> pd.DataFrame:
9
- df = database_views['database_view1']
10
- dim_cols = [x.strip() for x in ctx["group_by_cols"].split(",")]
11
- return df.sort_values(dim_cols)
@@ -1,3 +0,0 @@
1
- SELECT *
2
- FROM database_view1
3
- ORDER BY {{ prms["group_by"].get_selected("columns") }} -- {{ ctx["group_by_cols"] }}
@@ -1,47 +0,0 @@
1
- from typing import Dict, Sequence, Any
2
- import squirrels as sr
3
-
4
-
5
- def main(args: Dict[str, Any], *p_args, **kwargs) -> Sequence[sr.Parameter]:
6
-
7
- ## Example of creating SingleSelectParameter (similar for MultiSelectParameter)
8
- group_by_options = [
9
- sr.SelectParameterOption("g0", "Transaction", columns="ID,Date"),
10
- sr.SelectParameterOption("g1", "Date", columns="Date"),
11
- sr.SelectParameterOption("g2", "Category", columns="Category"),
12
- sr.SelectParameterOption("g3", "Subcategory", columns="Category,Subcategory"),
13
- ]
14
- group_by_param = sr.SingleSelectParameter("group_by", "Group By", group_by_options)
15
-
16
- ## Example of creating DateParameter
17
- start_date_param = sr.DateParameter("start_date", "Start Date", "2023-01-01")
18
-
19
- ## Example of creating DateParameter from lookup query/table
20
- end_date_ds = sr.DateDataSource("SELECT max(Date) as date FROM transactions", "date")
21
- end_date_param = sr.DataSourceParameter(sr.DateParameter, "end_date", "End Date", end_date_ds)
22
-
23
- ## Example of creating MultiSelectParameter from lookup query/table
24
- category_ds = sr.SelectionDataSource("SELECT DISTINCT Category_ID, Category FROM categories", "Category_ID", "Category")
25
- category_filter = sr.DataSourceParameter(sr.MultiSelectParameter, "category", "Category Filter", category_ds)
26
-
27
- ## Example of creating MultiSelectParameter with parent from lookup query/table
28
- subcategory_ds = sr.SelectionDataSource("categories", "Subcategory_ID", "Subcategory", parent_id_col="Category_ID")
29
- subcategory_filter = sr.DataSourceParameter(sr.MultiSelectParameter, "subcategory", "Subcategory Filter", subcategory_ds, parent=category_filter)
30
-
31
- ## Example of creating NumberParameter
32
- min_amount_filter = sr.NumberParameter("min_filter", "Amounts Greater Than", 0, 500, 10)
33
-
34
- ## Example of creating NumberParameter from lookup query/table
35
- query = """
36
- SELECT 0 as min_value, max(-Amount) as max_value, 10 as increment \
37
- FROM transactions WHERE Category <> 'Income'
38
- """
39
- max_amount_ds = sr.NumberDataSource(query, "min_value", "max_value", "increment", default_value_col="max_value")
40
- max_amount_filter = sr.DataSourceParameter(sr.NumberParameter, "max_filter", "Amounts Less Than", max_amount_ds)
41
-
42
- return [
43
- group_by_param,
44
- start_date_param, end_date_param,
45
- category_filter, subcategory_filter,
46
- min_amount_filter, max_amount_filter
47
- ]
@@ -1,9 +0,0 @@
1
- # specify parameters in same order as parameters.py
2
- [parameters]
3
- group_by = g0
4
- start_date = 2023-01-01
5
- end_date = 2023-12-01
6
- category =
7
- subcategory =
8
- min_filter = 0
9
- max_filter = 500
@@ -1,22 +0,0 @@
1
- modules: []
2
-
3
- project_variables:
4
- product: sample
5
- major_version: 1
6
- minor_version: 0
7
-
8
- db_connections:
9
- default:
10
- credential_key: null
11
- url: 'sqlite://${username}:${password}@/./database/sample_database.db'
12
-
13
- datasets:
14
- sample_dataset:
15
- label: Sample Dataset
16
- database_views:
17
- database_view1:
18
- file: database_view1.sql.j2
19
- db_connection: default
20
- final_view: database_view1
21
-
22
- settings: {}