squirrels 0.1.0__py3-none-any.whl → 0.6.0.post0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. dateutils/__init__.py +6 -0
  2. dateutils/_enums.py +25 -0
  3. squirrels/dateutils.py → dateutils/_implementation.py +409 -380
  4. dateutils/types.py +6 -0
  5. squirrels/__init__.py +21 -18
  6. squirrels/_api_routes/__init__.py +5 -0
  7. squirrels/_api_routes/auth.py +337 -0
  8. squirrels/_api_routes/base.py +196 -0
  9. squirrels/_api_routes/dashboards.py +156 -0
  10. squirrels/_api_routes/data_management.py +148 -0
  11. squirrels/_api_routes/datasets.py +220 -0
  12. squirrels/_api_routes/project.py +289 -0
  13. squirrels/_api_server.py +552 -134
  14. squirrels/_arguments/__init__.py +0 -0
  15. squirrels/_arguments/init_time_args.py +83 -0
  16. squirrels/_arguments/run_time_args.py +111 -0
  17. squirrels/_auth.py +777 -0
  18. squirrels/_command_line.py +239 -107
  19. squirrels/_compile_prompts.py +147 -0
  20. squirrels/_connection_set.py +94 -0
  21. squirrels/_constants.py +141 -64
  22. squirrels/_dashboards.py +179 -0
  23. squirrels/_data_sources.py +570 -0
  24. squirrels/_dataset_types.py +91 -0
  25. squirrels/_env_vars.py +209 -0
  26. squirrels/_exceptions.py +29 -0
  27. squirrels/_http_error_responses.py +52 -0
  28. squirrels/_initializer.py +319 -110
  29. squirrels/_logging.py +121 -0
  30. squirrels/_manifest.py +357 -187
  31. squirrels/_mcp_server.py +578 -0
  32. squirrels/_model_builder.py +69 -0
  33. squirrels/_model_configs.py +74 -0
  34. squirrels/_model_queries.py +52 -0
  35. squirrels/_models.py +1201 -0
  36. squirrels/_package_data/base_project/.env +7 -0
  37. squirrels/_package_data/base_project/.env.example +44 -0
  38. squirrels/_package_data/base_project/connections.yml +16 -0
  39. squirrels/_package_data/base_project/dashboards/dashboard_example.py +40 -0
  40. squirrels/_package_data/base_project/dashboards/dashboard_example.yml +22 -0
  41. squirrels/_package_data/base_project/docker/.dockerignore +16 -0
  42. squirrels/_package_data/base_project/docker/Dockerfile +16 -0
  43. squirrels/_package_data/base_project/docker/compose.yml +7 -0
  44. squirrels/_package_data/base_project/duckdb_init.sql +10 -0
  45. squirrels/_package_data/base_project/gitignore +13 -0
  46. squirrels/_package_data/base_project/macros/macros_example.sql +17 -0
  47. squirrels/_package_data/base_project/models/builds/build_example.py +26 -0
  48. squirrels/_package_data/base_project/models/builds/build_example.sql +16 -0
  49. squirrels/_package_data/base_project/models/builds/build_example.yml +57 -0
  50. squirrels/_package_data/base_project/models/dbviews/dbview_example.sql +17 -0
  51. squirrels/_package_data/base_project/models/dbviews/dbview_example.yml +32 -0
  52. squirrels/_package_data/base_project/models/federates/federate_example.py +51 -0
  53. squirrels/_package_data/base_project/models/federates/federate_example.sql +21 -0
  54. squirrels/_package_data/base_project/models/federates/federate_example.yml +65 -0
  55. squirrels/_package_data/base_project/models/sources.yml +38 -0
  56. squirrels/_package_data/base_project/parameters.yml +142 -0
  57. squirrels/_package_data/base_project/pyconfigs/connections.py +19 -0
  58. squirrels/_package_data/base_project/pyconfigs/context.py +96 -0
  59. squirrels/_package_data/base_project/pyconfigs/parameters.py +141 -0
  60. squirrels/_package_data/base_project/pyconfigs/user.py +56 -0
  61. squirrels/_package_data/base_project/resources/expenses.db +0 -0
  62. squirrels/_package_data/base_project/resources/public/.gitkeep +0 -0
  63. squirrels/_package_data/base_project/resources/weather.db +0 -0
  64. squirrels/_package_data/base_project/seeds/seed_categories.csv +6 -0
  65. squirrels/_package_data/base_project/seeds/seed_categories.yml +15 -0
  66. squirrels/_package_data/base_project/seeds/seed_subcategories.csv +15 -0
  67. squirrels/_package_data/base_project/seeds/seed_subcategories.yml +21 -0
  68. squirrels/_package_data/base_project/squirrels.yml.j2 +61 -0
  69. squirrels/_package_data/base_project/tmp/.gitignore +2 -0
  70. squirrels/_package_data/templates/login_successful.html +53 -0
  71. squirrels/_package_data/templates/squirrels_studio.html +22 -0
  72. squirrels/_package_loader.py +29 -0
  73. squirrels/_parameter_configs.py +592 -0
  74. squirrels/_parameter_options.py +348 -0
  75. squirrels/_parameter_sets.py +207 -0
  76. squirrels/_parameters.py +1703 -0
  77. squirrels/_project.py +796 -0
  78. squirrels/_py_module.py +122 -0
  79. squirrels/_request_context.py +33 -0
  80. squirrels/_schemas/__init__.py +0 -0
  81. squirrels/_schemas/auth_models.py +83 -0
  82. squirrels/_schemas/query_param_models.py +70 -0
  83. squirrels/_schemas/request_models.py +26 -0
  84. squirrels/_schemas/response_models.py +286 -0
  85. squirrels/_seeds.py +97 -0
  86. squirrels/_sources.py +112 -0
  87. squirrels/_utils.py +540 -149
  88. squirrels/_version.py +1 -3
  89. squirrels/arguments.py +7 -0
  90. squirrels/auth.py +4 -0
  91. squirrels/connections.py +3 -0
  92. squirrels/dashboards.py +3 -0
  93. squirrels/data_sources.py +14 -282
  94. squirrels/parameter_options.py +13 -189
  95. squirrels/parameters.py +14 -801
  96. squirrels/types.py +18 -0
  97. squirrels-0.6.0.post0.dist-info/METADATA +148 -0
  98. squirrels-0.6.0.post0.dist-info/RECORD +101 -0
  99. {squirrels-0.1.0.dist-info → squirrels-0.6.0.post0.dist-info}/WHEEL +1 -2
  100. {squirrels-0.1.0.dist-info → squirrels-0.6.0.post0.dist-info}/entry_points.txt +1 -0
  101. squirrels-0.6.0.post0.dist-info/licenses/LICENSE +201 -0
  102. squirrels/_credentials_manager.py +0 -87
  103. squirrels/_module_loader.py +0 -37
  104. squirrels/_parameter_set.py +0 -151
  105. squirrels/_renderer.py +0 -286
  106. squirrels/_timed_imports.py +0 -37
  107. squirrels/connection_set.py +0 -126
  108. squirrels/package_data/base_project/.gitignore +0 -4
  109. squirrels/package_data/base_project/connections.py +0 -21
  110. squirrels/package_data/base_project/database/sample_database.db +0 -0
  111. squirrels/package_data/base_project/database/seattle_weather.db +0 -0
  112. squirrels/package_data/base_project/datasets/sample_dataset/context.py +0 -8
  113. squirrels/package_data/base_project/datasets/sample_dataset/database_view1.py +0 -23
  114. squirrels/package_data/base_project/datasets/sample_dataset/database_view1.sql.j2 +0 -7
  115. squirrels/package_data/base_project/datasets/sample_dataset/final_view.py +0 -10
  116. squirrels/package_data/base_project/datasets/sample_dataset/final_view.sql.j2 +0 -2
  117. squirrels/package_data/base_project/datasets/sample_dataset/parameters.py +0 -30
  118. squirrels/package_data/base_project/datasets/sample_dataset/selections.cfg +0 -6
  119. squirrels/package_data/base_project/squirrels.yaml +0 -26
  120. squirrels/package_data/static/favicon.ico +0 -0
  121. squirrels/package_data/static/script.js +0 -234
  122. squirrels/package_data/static/style.css +0 -110
  123. squirrels/package_data/templates/index.html +0 -32
  124. squirrels-0.1.0.dist-info/LICENSE +0 -22
  125. squirrels-0.1.0.dist-info/METADATA +0 -67
  126. squirrels-0.1.0.dist-info/RECORD +0 -40
  127. squirrels-0.1.0.dist-info/top_level.txt +0 -1
@@ -1,151 +0,0 @@
1
- from __future__ import annotations
2
- from typing import Sequence, Dict, Any
3
- from collections import OrderedDict
4
-
5
- from squirrels import data_sources as d, parameters as p
6
- from squirrels._timed_imports import pandas as pd
7
-
8
-
9
- class ParameterSetBase:
10
- def __init__(self) -> None:
11
- """
12
- Constructor for ParameterSetBase, the base class for ParameterSet. Similar to ParameterSet but without
13
- a separate collection for DataSourceParameter's, and does not pre-set the parameters in constructor.
14
- """
15
- self._parameters_dict: OrderedDict[str, p.Parameter] = OrderedDict()
16
-
17
- def add_parameter(self, parameter: p.Parameter) -> None:
18
- """
19
- Adds a parameter to the "parameter collection"
20
-
21
- Parameters:
22
- parameter: The parameter to add
23
- """
24
- self._parameters_dict[parameter.name] = parameter
25
-
26
- def get_parameter(self, param_name: str) -> p.Parameter:
27
- """
28
- Gets the Parameter object given the parameter name
29
-
30
- Parameters:
31
- param_name: The parameter name
32
-
33
- Returns:
34
- The Parameter object corresponding to the parameter name
35
- """
36
- if param_name in self._parameters_dict:
37
- return self._parameters_dict[param_name]
38
- else:
39
- raise KeyError(f'No such parameter exists called "{param_name}"')
40
-
41
- def __getitem__(self, param_name: str) -> p.Parameter:
42
- return self.get_parameter(param_name)
43
-
44
- def get_parameters_as_ordered_dict(self) -> OrderedDict[str, p.Parameter]:
45
- """
46
- Returns the inner dictionary of the "parameter collection"
47
-
48
- Returns:
49
- A dictionary where key are the assigned names and values are the Parameter objects
50
- """
51
- return OrderedDict(self._parameters_dict)
52
-
53
- def merge(self, other: ParameterSetBase) -> ParameterSetBase:
54
- """
55
- Merges the "parameter collection" of this and another ParameterSetBase
56
-
57
- Parameters:
58
- other: The other ParameterSetBase
59
-
60
- Returns:
61
- A new copy of the ParameterSetBase as a result of the merge
62
- """
63
- new_param_set = ParameterSetBase()
64
- new_param_set._parameters_dict = OrderedDict(self._parameters_dict)
65
- new_param_set._parameters_dict.update(other._parameters_dict)
66
- return new_param_set
67
-
68
- def to_json_dict(self, debug: bool = False) -> Dict[str, Any]:
69
- """
70
- Converts this object, and all parameters contained, into a JSON dictionary
71
-
72
- Parameters:
73
- debug: Set to True to make the "hidden" parameters show as part of the result
74
-
75
- Returns:
76
- A collection of parameters as a JSON dictionary used for the "parameters" endpoint
77
- """
78
- parameters = []
79
- for x in self._parameters_dict.values():
80
- if not x.is_hidden or debug:
81
- parameters.append(x.to_json_dict())
82
-
83
- output = {
84
- "response_version": 0,
85
- "parameters": parameters
86
- }
87
- return output
88
-
89
-
90
- class ParameterSet(ParameterSetBase):
91
- def __init__(self, parameters: Sequence[p.Parameter]):
92
- """
93
- Constructor for ParameterSet, a wrapper class for a sequence of parameters,
94
- and stores the DataSourceParameters as a separate field as well
95
-
96
- Parameters:
97
- parameters: A sequence of parameters
98
- """
99
- super().__init__()
100
- self._data_source_params: OrderedDict[str, p.DataSourceParameter] = OrderedDict()
101
- for param in parameters:
102
- self._parameters_dict[param.name] = param
103
- if isinstance(param, p.DataSourceParameter):
104
- self._data_source_params[param.name] = param
105
-
106
- def merge(self, other: ParameterSetBase) -> ParameterSet:
107
- """
108
- Merges this object with another ParameterSet (by combining the parameters) to create a new ParameterSet.
109
-
110
- The _parameters_dict are merged (with the other ParameterSet taking precedence when a name exist in both dict),
111
- while the _data_source_params are only taken from this object. This object and the other ParameterSet remain
112
- unchanged.
113
-
114
- Parameters:
115
- other: The other parameter set
116
-
117
- Returns:
118
- A new ParameterSet that contains all the parameters from this and the other parameter set.
119
- """
120
- new_param_set_base = super().merge(other)
121
- new_param_set = ParameterSet(())
122
- new_param_set._parameters_dict = new_param_set_base._parameters_dict
123
- new_param_set._data_source_params = self._data_source_params
124
- return new_param_set
125
-
126
- def get_datasources(self) -> Dict[str, d.DataSource]:
127
- """
128
- Gets all the DataSource objects as values to a dictionary where keys are the DataSource parameter names.
129
-
130
- Each DataSource object represents a lookup table with table name, connection name, corresponding columns to ID, label, etc.
131
-
132
- Returns:
133
- A dictionary where keys are the names of DataSourceParameter's and values are the corresponding DataSource.
134
- """
135
- new_dict = {}
136
- for param_name, ds_param in self._data_source_params.items():
137
- new_dict[param_name] = ds_param.data_source
138
- return new_dict
139
-
140
- def convert_datasource_params(self, df_dict: Dict[str, pd.DataFrame]) -> None:
141
- """
142
- Changes all the DataSourceParameters into other Parameter types. The _data_source_params field gets cleared.
143
-
144
- Parameters:
145
- df_dict: A dictionary of DataSourceParameter name to the pandas DataFrame of the lookup table data.
146
- """
147
- # Done sequentially since parents must be converted first before children
148
- for key, ds_param in self._data_source_params.items():
149
- ds_param.parent = self.get_parameter(ds_param.parent.name) if ds_param.parent is not None else None
150
- self._parameters_dict[key] = ds_param.convert(df_dict[key])
151
- self._data_source_params.clear()
squirrels/_renderer.py DELETED
@@ -1,286 +0,0 @@
1
- from typing import Dict, Tuple, Optional, Union, Callable, Any
2
- from functools import partial
3
- from configparser import ConfigParser
4
- import concurrent.futures, os, json, time
5
-
6
- from squirrels import _constants as c, _manifest as mf, _utils
7
- from squirrels.connection_set import ConnectionSet, sqldf
8
- from squirrels.data_sources import DataSource
9
- from squirrels._parameter_set import ParameterSet
10
- from squirrels._utils import ConfigurationError
11
- from squirrels._timed_imports import pandas as pd, timer
12
-
13
- ContextFunc = Optional[Callable[..., Dict[str, Any]]]
14
- DatabaseViews = Optional[Dict[str, pd.DataFrame]]
15
- Query = Union[Callable[..., pd.DataFrame], str]
16
-
17
-
18
- class Renderer:
19
- def __init__(self, dataset: str, manifest: mf.Manifest, conn_set: ConnectionSet, raw_param_set: ParameterSet,
20
- context_func: Callable[..., Dict[str, Any]], raw_query_by_db_view: Dict[str, Query],
21
- raw_final_view_query: Query, excel_file: Optional[pd.ExcelFile] = None):
22
- self.dataset = dataset
23
- self.manifest = manifest
24
- self.conn_set = conn_set
25
- self.context_func = context_func
26
- self.raw_query_by_db_view = raw_query_by_db_view
27
- self.raw_final_view_query = raw_final_view_query
28
-
29
- start = time.time()
30
- self.param_set: ParameterSet = self._convert_param_set_datasources(raw_param_set, excel_file)
31
- timer.add_activity_time(f"convert datasources - dataset {dataset}", start)
32
-
33
- def _convert_param_set_datasources(self, param_set: ParameterSet, excel_file: Optional[pd.ExcelFile] = None) -> ParameterSet:
34
- datasources = param_set.get_datasources()
35
- if excel_file is not None:
36
- df_dict = pd.read_excel(excel_file, None)
37
- for key in datasources:
38
- if key not in df_dict:
39
- raise ConfigurationError('No sheet found for parameter "{key}" in the Excel workbook')
40
- else:
41
- def get_dataframe_from_query(item: Tuple[str, DataSource]) -> pd.DataFrame:
42
- key, datasource = item
43
- df = self.conn_set.get_dataframe_from_query(datasource.connection_name, datasource.get_query())
44
- return key, df
45
-
46
- with concurrent.futures.ThreadPoolExecutor() as executor:
47
- df_dict = dict(executor.map(get_dataframe_from_query, datasources.items()))
48
-
49
- param_set.convert_datasource_params(df_dict)
50
- return param_set
51
-
52
- def apply_selections(self, selections: Dict[str, str], updates_only: bool = False) -> ParameterSet:
53
- start = time.time()
54
- parameter_set = self.param_set
55
- parameters_dict = parameter_set.get_parameters_as_ordered_dict()
56
-
57
- # iterating through parameters dict instead of query_params since order matters for cascading parameters
58
- for param_name, parameter in parameters_dict.items():
59
- if param_name in selections:
60
- value = selections[param_name]
61
- parameter = parameter_set.get_parameter(param_name).with_selection(value)
62
- updates = parameter.get_all_dependent_params()
63
- if updates_only:
64
- parameter_set = updates
65
- break
66
- parameter_set = parameter_set.merge(updates)
67
- timer.add_activity_time(f"apply selections - dataset {self.dataset}", start)
68
-
69
- return parameter_set
70
-
71
- def _render_context(self, context_func: ContextFunc, param_set: ParameterSet) -> Dict[str, Any]:
72
- try:
73
- return context_func(prms=param_set.get_parameters_as_ordered_dict()) if context_func is not None else {}
74
- except Exception as e:
75
- raise ConfigurationError(f'Error in the {c.CONTEXT_FILE} function for dataset "{self.dataset}"') from e
76
-
77
- def _get_args(self, param_set: ParameterSet, context: Dict[str, Any], db_view: str = None) -> Dict:
78
- if db_view is not None:
79
- args = self.manifest.get_view_args(self.dataset, db_view)
80
- else:
81
- args = self.manifest.get_view_args(self.dataset)
82
- return {
83
- 'prms': param_set.get_parameters_as_ordered_dict(),
84
- 'ctx': context,
85
- 'args': args
86
- }
87
-
88
- def _render_query_from_raw(self, raw_query: Query, args: Dict) -> Query:
89
- if isinstance(raw_query, str):
90
- template = _utils.j2_env.from_string(raw_query)
91
- return template.render(args)
92
- else:
93
- return partial(raw_query, **args)
94
-
95
- def _render_dataframe_from_sql(self, db_view_name: str, sql_str: str,
96
- database_views: DatabaseViews = None) -> pd.DataFrame:
97
- if database_views is not None:
98
- return sqldf(sql_str, database_views)
99
- else:
100
- conn_name = self.manifest.get_database_view_db_connection(self.dataset, db_view_name)
101
- return self.conn_set.get_dataframe_from_query(conn_name, sql_str)
102
-
103
- def _render_dataframe_from_py_func(self, db_view_name: str, py_func: Callable[[Any], pd.DataFrame],
104
- database_views: DatabaseViews = None) -> pd.DataFrame:
105
- if database_views is not None:
106
- try:
107
- return py_func(database_views=database_views)
108
- except Exception as e:
109
- raise ConfigurationError(f'Error in the final view python function for dataset "{self.dataset}"') from e
110
- else:
111
- conn_name = self.manifest.get_database_view_db_connection(self.dataset, db_view_name)
112
- connection_pool = self.conn_set.get_connection_pool(conn_name)
113
- try:
114
- return py_func(connection_pool=connection_pool, connection_set=self.conn_set)
115
- except Exception as e:
116
- raise ConfigurationError(f'Error in the python function for database view "{db_view_name}" in dataset "{self.dataset}"') from e
117
-
118
- def _render_db_view_dataframes(self, query_by_db_view: Dict[str, Query]) -> Dict[str, pd.DataFrame]:
119
- def run_single_query(item: Tuple[str, Query]) -> Tuple[str, pd.DataFrame]:
120
- view_name, query = item
121
- if isinstance(query, str):
122
- return view_name, self._render_dataframe_from_sql(view_name, query)
123
- else:
124
- return view_name, self._render_dataframe_from_py_func(view_name, query)
125
-
126
- with concurrent.futures.ThreadPoolExecutor() as executor:
127
- df_by_view_name = executor.map(run_single_query, query_by_db_view.items())
128
-
129
- return dict(df_by_view_name)
130
-
131
- def _render_final_view_dataframe(self, df_by_db_views: Dict[str, pd.DataFrame],
132
- final_view_query: Optional[Query]) -> pd.DataFrame:
133
- if final_view_query in df_by_db_views:
134
- return df_by_db_views[final_view_query]
135
- elif isinstance(final_view_query, str):
136
- return self._render_dataframe_from_sql("final_view", final_view_query, df_by_db_views)
137
- else:
138
- return self._render_dataframe_from_py_func("final_view", final_view_query, df_by_db_views)
139
-
140
- def load_results(self, selections: Dict[str, str], run_query: bool = True) \
141
- -> Tuple[ParameterSet, Dict[str, Query], Query, Dict[str, pd.DataFrame], Optional[pd.DataFrame]]:
142
-
143
- # apply selections and render context
144
- param_set = self.apply_selections(selections)
145
- start = time.time()
146
- context = self._render_context(self.context_func, param_set)
147
- timer.add_activity_time(f"render context - dataset {self.dataset}", start)
148
-
149
- # render database view queries
150
- start = time.time()
151
- query_by_db_view = {}
152
- for db_view, raw_query in self.raw_query_by_db_view.items():
153
- args = self._get_args(param_set, context, db_view)
154
- query_by_db_view[db_view] = self._render_query_from_raw(raw_query, args)
155
- timer.add_activity_time(f"render database view queries - dataset {self.dataset}", start)
156
-
157
- # render final view query
158
- start = time.time()
159
- args = self._get_args(param_set, context)
160
- final_view_query = self._render_query_from_raw(self.raw_final_view_query, args)
161
- timer.add_activity_time(f"render final view query - dataset {self.dataset}", start)
162
-
163
- # render all dataframes if "run_query" is enabled
164
- df_by_db_views = {}
165
- final_view_df = None
166
- if run_query:
167
- start = time.time()
168
- df_by_db_views = self._render_db_view_dataframes(query_by_db_view)
169
- timer.add_activity_time(f"execute dataview view queries - dataset {self.dataset}", start)
170
-
171
- start = time.time()
172
- final_view_df = self._render_final_view_dataframe(df_by_db_views, final_view_query)
173
- timer.add_activity_time(f"execute final view query - dataset {self.dataset}", start)
174
-
175
- return param_set, query_by_db_view, final_view_query, df_by_db_views, final_view_df
176
-
177
-
178
- def default_context_func(*args, **kwargs):
179
- return {}
180
-
181
-
182
- class RendererIOWrapper:
183
- def __init__(self, dataset: str, manifest: mf.Manifest, conn_set: ConnectionSet, excel_file_name: Optional[str] = None):
184
- dataset_folder = manifest.get_dataset_folder(dataset)
185
- parameters_path = _utils.join_paths(dataset_folder, c.PARAMETERS_FILE)
186
- args = manifest.get_dataset_args(dataset)
187
- parameters_module = _utils.import_file_as_module(parameters_path)
188
- try:
189
- parameter_set = ParameterSet(parameters_module.main(args=args))
190
- except Exception as e:
191
- raise ConfigurationError(f'Error in the {c.PARAMETERS_FILE} function for dataset "{dataset}"') from e
192
-
193
- context_path = _utils.join_paths(dataset_folder, c.CONTEXT_FILE)
194
- try:
195
- context_module = _utils.import_file_as_module(context_path)
196
- context_func = partial(context_module.main, args=args)
197
- except FileNotFoundError:
198
- context_func = default_context_func
199
-
200
- excel_file = None
201
- if excel_file_name is not None:
202
- excel_file_path = _utils.join_paths(dataset_folder, excel_file_name)
203
- excel_file = pd.ExcelFile(excel_file_path)
204
-
205
- db_views = manifest.get_all_database_view_names(dataset)
206
- raw_query_by_db_view = {}
207
- for db_view in db_views:
208
- db_view_template_path = str(manifest.get_database_view_file(dataset, db_view))
209
- raw_query_by_db_view[db_view] = self._get_raw_query(db_view_template_path)
210
-
211
- final_view_path = str(manifest.get_dataset_final_view_file(dataset))
212
- if final_view_path in db_views:
213
- raw_final_view_query = final_view_path
214
- else:
215
- raw_final_view_query = self._get_raw_query(final_view_path)
216
-
217
- self.dataset_folder = dataset_folder
218
- self.output_folder = _utils.join_paths(c.OUTPUTS_FOLDER, dataset)
219
- self.renderer = Renderer(dataset, manifest, conn_set, parameter_set, context_func,
220
- raw_query_by_db_view, raw_final_view_query, excel_file)
221
-
222
- def _get_raw_query(self, template_path: str) -> Dict[str, Query]:
223
- if template_path.endswith(".py"):
224
- return _utils.import_file_as_module(template_path).main
225
- else:
226
- with open(template_path, 'r') as f:
227
- sql_template = f.read()
228
- return sql_template
229
-
230
- def _get_selections(self, selection_cfg_file: Optional[str]) -> Dict[str, str]:
231
- if selection_cfg_file is not None:
232
- selection_cfg_path = _utils.join_paths(self.dataset_folder, selection_cfg_file)
233
- config = ConfigParser()
234
- config.read(selection_cfg_path)
235
- if config.has_section(c.PARAMETERS_SECTION):
236
- config_section = config[c.PARAMETERS_SECTION]
237
- return dict(config_section.items())
238
- return {}
239
-
240
- def _write_sql_file(self, view_name: str, query: Any):
241
- if isinstance(query, str):
242
- db_view_sql_output_path = _utils.join_paths(self.output_folder, view_name+'.sql')
243
- with open(db_view_sql_output_path, 'w') as f:
244
- f.write(query)
245
-
246
- def write_outputs(self, selection_cfg_file: Optional[str], run_query: bool) -> None:
247
- # create output folder if it doesn't exist
248
- if not os.path.exists(self.output_folder):
249
- os.makedirs(self.output_folder)
250
-
251
- # clear everything in output folder
252
- files = os.listdir(self.output_folder)
253
- for file in files:
254
- file_path = _utils.join_paths(self.output_folder, file)
255
- os.remove(file_path)
256
-
257
- # apply selections and render outputs
258
- selections = self._get_selections(selection_cfg_file)
259
- result = self.renderer.load_results(selections, run_query)
260
- param_set, query_by_db_view, final_view_query, df_by_db_views, final_view_df = result
261
-
262
- # write the parameters response
263
- param_set_dict = param_set.to_json_dict()
264
- parameter_json_output_path = _utils.join_paths(self.output_folder, c.PARAMETERS_OUTPUT)
265
- with open(parameter_json_output_path, 'w') as f:
266
- json.dump(param_set_dict, f, indent=4)
267
-
268
- # write the rendered sql queries for database views
269
- for db_view, query in query_by_db_view.items():
270
- self._write_sql_file(db_view, query)
271
-
272
- # write the rendered sql query for final view
273
- if final_view_query not in query_by_db_view:
274
- self._write_sql_file(c.FINAL_VIEW_OUT_STEM, final_view_query)
275
-
276
- # Run the sql queries and write output
277
- if run_query:
278
- for db_view, df in df_by_db_views.items():
279
- csv_file = _utils.join_paths(self.output_folder, db_view+'.csv')
280
- df.to_csv(csv_file, index=False)
281
-
282
- final_csv_path = _utils.join_paths(self.output_folder, c.FINAL_VIEW_OUT_STEM+'.csv')
283
- final_view_df.to_csv(final_csv_path, index=False)
284
-
285
- final_json_path = _utils.join_paths(self.output_folder, c.FINAL_VIEW_OUT_STEM+'.json')
286
- final_view_df.to_json(final_json_path, orient='table', index=False, indent=4)
@@ -1,37 +0,0 @@
1
- from typing import Dict, List
2
- import time
3
-
4
-
5
- class Timer:
6
- def __init__(self, verbose: bool = False):
7
- self.times: Dict[str, List[float]] = dict()
8
- self.verbose = verbose
9
-
10
- def add_activity_time(self, activity: str, start: float):
11
- if self.verbose:
12
- time_taken = (time.time()-start) * 10**3
13
- times_list = self.times.setdefault(activity, list())
14
- times_list.append(time_taken)
15
- print(f'Time taken for "{activity}": {time_taken}ms')
16
-
17
- def report_times(self):
18
- if self.verbose:
19
- for activity, times_list in self.times.items():
20
- total_time = sum(times_list)
21
- avg_time = total_time / len(times_list)
22
- print()
23
- print(f'Time statistics for "{activity}":')
24
- print(f' Total time: {total_time}ms')
25
- print(f' Average time: {avg_time}ms')
26
-
27
- timer = Timer()
28
-
29
-
30
- start = time.time()
31
- import pandas
32
- from pandas.api import types as pd_types
33
- timer.add_activity_time("import pandas", start)
34
-
35
- start = time.time()
36
- import jinja2
37
- timer.add_activity_time("import jinja", start)
@@ -1,126 +0,0 @@
1
- from typing import Dict, Union
2
- from importlib.machinery import SourceFileLoader
3
- from sqlalchemy import Engine, Pool
4
- import sqlite3
5
-
6
- from squirrels import _constants as c, _manifest as mf
7
- from squirrels._timed_imports import pandas as pd
8
- from squirrels._utils import ConfigurationError
9
-
10
- ConnectionPool = Union[Engine, Pool]
11
-
12
-
13
- class ConnectionSet:
14
- def __init__(self, conn_pools: Dict[str, ConnectionPool]) -> None:
15
- """
16
- Constructor for ConnectionSet, a wrapper class around a collection of Connection Pools or Sqlalchemy Engines
17
-
18
- Parameters:
19
- conn_pools: A dictionary of connection pool name to the corresponding Pool or Engine from sqlalchemy
20
- """
21
- self._conn_pools = conn_pools
22
-
23
- def get_connection_pool(self, conn_name: str = "default") -> ConnectionPool:
24
- """
25
- Gets to sqlalchemy Pool or Engine from the database connection name
26
-
27
- Parameters:
28
- conn_name: Name of Pool or Engine. If not provided, defaults to "default"
29
-
30
- Returns:
31
- A sqlalchemy Pool or Engine
32
- """
33
- try:
34
- connection_pool = self._conn_pools[conn_name]
35
- except KeyError as e:
36
- raise ConfigurationError(f'Connection name "{conn_name}" was not configured') from e
37
- return connection_pool
38
-
39
- def __getitem__(self, conn_name: str) -> ConnectionPool:
40
- """
41
- Same as get_connection_pool
42
- """
43
- return self.get_connection_pool(conn_name)
44
-
45
- def get_dataframe_from_query(self, conn_name: str, query: str) -> pd.DataFrame:
46
- """
47
- Runs a SQL query on a database connection name, and returns the results as pandas DataFrame
48
-
49
- Parameters:
50
- conn_name: Name of Pool or Engine
51
- query: The SQL query to run
52
-
53
- Returns:
54
- A pandas DataFrame
55
- """
56
- connector = self.get_connection_pool(conn_name)
57
- if isinstance(connector, Pool):
58
- conn = connector.connect()
59
- elif isinstance(connector, Engine):
60
- conn = connector.raw_connection()
61
- else:
62
- raise TypeError(f'Type for connection name "{conn_name}" not supported')
63
-
64
- try:
65
- cur = conn.cursor()
66
- cur.execute(query)
67
- df = pd.DataFrame(data=cur.fetchall(), columns=[x[0] for x in cur.description])
68
- finally:
69
- conn.close()
70
-
71
- return df
72
-
73
- def _dispose(self) -> None:
74
- """
75
- Disposes of all the connection pools in this ConnectionSet
76
- """
77
- for pool in self._conn_pools.values():
78
- pool.dispose()
79
-
80
-
81
- def _from_file(manifest: mf.Manifest) -> ConnectionSet:
82
- """
83
- Takes the DB Connections from both the squirrels.yaml and connections.py files and merges them
84
- into a single ConnectionSet
85
-
86
- Parameters:
87
- manifest: The object of Manifest class, the interface for the squirrels.yaml file
88
-
89
- Returns:
90
- A ConnectionSet with the DB connections from both squirrels.yaml and connections.py
91
- """
92
- connections = manifest.get_db_connections()
93
- try:
94
- module = SourceFileLoader(c.CONNECTIONS_FILE, c.CONNECTIONS_FILE).load_module()
95
- except FileNotFoundError:
96
- module = None
97
-
98
- if module is not None:
99
- proj_vars = manifest.get_proj_vars()
100
- try:
101
- conn_from_py_file = module.main(proj_vars)
102
- except Exception as e:
103
- raise ConfigurationError(f'Error in the {c.CONNECTIONS_FILE} file') from e
104
- else:
105
- conn_from_py_file = {}
106
- return ConnectionSet({**connections, **conn_from_py_file})
107
-
108
-
109
- def sqldf(query: str, df_by_db_views: Dict[str, pd.DataFrame]) -> pd.DataFrame:
110
- """
111
- Uses a dictionary of dataframes to execute a SQL query in an in-memory sqlite database
112
-
113
- Parameters:
114
- query: The SQL query to run using sqlite
115
- df_by_db_views: A dictionary of table names to their pandas Dataframe
116
-
117
- Returns:
118
- The result as a pandas Dataframe from running the query
119
- """
120
- conn = sqlite3.connect(":memory:")
121
- try:
122
- for db_view, df in df_by_db_views.items():
123
- df.to_sql(db_view, conn, index=False)
124
- return pd.read_sql(query, conn)
125
- finally:
126
- conn.close()
@@ -1,4 +0,0 @@
1
- __pycache__
2
- /outputs
3
- /modules
4
- /venv
@@ -1,21 +0,0 @@
1
- from typing import Dict, Union, Any
2
- from sqlalchemy import create_engine, Engine, Pool, QueuePool
3
-
4
- from squirrels import get_credential
5
-
6
-
7
- # Note: all connections must be shareable across multiple thread. No writes will occur on them
8
- def main(proj: Dict[str, Any], *p_args, **kwargs) -> Dict[str, Union[Engine, Pool]]:
9
-
10
- # ## Example of getting the username and password set with "$ squirrels set-credential [key]"
11
- # cred = get_credential('my_key')
12
- # # Use cred.username and cred.password to access the username and password
13
-
14
- # Create a connection pool / engine
15
- pool = create_engine('sqlite:///./database/sample_database.db')
16
-
17
- # ## Example of using QueuePool instead for a custom db connector:
18
- # connection_creator = lambda: sqlite3.connect('./database/sample_database.db', check_same_thread=False)
19
- # pool = QueuePool(connection_creator)
20
-
21
- return {'default': pool}
@@ -1,8 +0,0 @@
1
- from typing import Dict, Any
2
- import squirrels as sr
3
-
4
-
5
- def main(prms: Dict[str, sr.Parameter], args: Dict[str, Any], *p_args, **kwargs) -> Dict[str, Any]:
6
- limit_parameter: sr.NumberParameter = prms['upper_bound']
7
- limit: str = limit_parameter.get_selected_value()
8
- return {'limit': limit}
@@ -1,23 +0,0 @@
1
- from typing import Dict, Any
2
- import pandas as pd
3
-
4
- import squirrels as sr
5
-
6
-
7
- def main(connection_set: sr.ConnectionSet,
8
- prms: Dict[str, sr.Parameter], ctx: Dict[str, Any], args: Dict[str, Any],
9
- *p_args, **kwargs) -> pd.DataFrame:
10
- # pool = connection_set.get_connection_pool("default")
11
- # conn = pool.connect() # use this to get a DBAPI connection from a Pool or sqlalchemy connection from an Engine
12
- # conn = pool.raw_connection() # use this to get a DBAPI connection from an Engine
13
-
14
- df = pd.DataFrame({
15
- 'dim1': ['a', 'b', 'c', 'd', 'e', 'f'],
16
- 'metric1': [1, 2, 3, 4, 5, 6],
17
- 'metric2': [2, 4, 5, 1, 7, 3]
18
- })
19
- limit_parameter: sr.NumberParameter = prms['upper_bound']
20
- limit = limit_parameter.get_selected_value()
21
- # limit: str = ctx['limit'] # use this instead if context.py is defined
22
-
23
- return df.query(f'metric1 <= {limit}')
@@ -1,7 +0,0 @@
1
- -- %USE some_db -- TBA: this line is optional when connecting to the "default" db_connection
2
-
3
- -- note: if context.py is defined, you can use "ctx['limit']" instead of "prms['number_example'].get_selected_value()"
4
- SELECT dim1, avg(metric1) as metric1, avg(metric2) as metric2
5
- FROM fact_table
6
- WHERE metric1 <= {{ prms['upper_bound'].get_selected_value() }}
7
- GROUP BY dim1