squirrels 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of squirrels might be problematic. Click here for more details.

Files changed (56) hide show
  1. squirrels/__init__.py +7 -3
  2. squirrels/_api_response_models.py +96 -72
  3. squirrels/_api_server.py +375 -201
  4. squirrels/_authenticator.py +23 -22
  5. squirrels/_command_line.py +70 -46
  6. squirrels/_connection_set.py +23 -25
  7. squirrels/_constants.py +29 -78
  8. squirrels/_dashboards_io.py +61 -0
  9. squirrels/_environcfg.py +53 -50
  10. squirrels/_initializer.py +184 -141
  11. squirrels/_manifest.py +168 -195
  12. squirrels/_models.py +159 -292
  13. squirrels/_package_loader.py +7 -8
  14. squirrels/_parameter_configs.py +173 -141
  15. squirrels/_parameter_sets.py +49 -38
  16. squirrels/_py_module.py +7 -7
  17. squirrels/_seeds.py +13 -12
  18. squirrels/_utils.py +114 -54
  19. squirrels/_version.py +1 -1
  20. squirrels/arguments/init_time_args.py +16 -10
  21. squirrels/arguments/run_time_args.py +89 -24
  22. squirrels/dashboards.py +82 -0
  23. squirrels/data_sources.py +212 -232
  24. squirrels/dateutils.py +29 -26
  25. squirrels/package_data/assets/index.css +1 -1
  26. squirrels/package_data/assets/index.js +27 -18
  27. squirrels/package_data/base_project/.gitignore +2 -2
  28. squirrels/package_data/base_project/connections.yml +1 -1
  29. squirrels/package_data/base_project/dashboards/dashboard_example.py +32 -0
  30. squirrels/package_data/base_project/dashboards.yml +10 -0
  31. squirrels/package_data/base_project/docker/.dockerignore +9 -4
  32. squirrels/package_data/base_project/docker/Dockerfile +7 -6
  33. squirrels/package_data/base_project/docker/compose.yml +1 -1
  34. squirrels/package_data/base_project/env.yml +2 -2
  35. squirrels/package_data/base_project/models/dbviews/{database_view1.py → dbview_example.py} +2 -1
  36. squirrels/package_data/base_project/models/dbviews/{database_view1.sql → dbview_example.sql} +3 -2
  37. squirrels/package_data/base_project/models/federates/{dataset_example.py → federate_example.py} +6 -6
  38. squirrels/package_data/base_project/models/federates/{dataset_example.sql → federate_example.sql} +1 -1
  39. squirrels/package_data/base_project/parameters.yml +6 -4
  40. squirrels/package_data/base_project/pyconfigs/auth.py +1 -1
  41. squirrels/package_data/base_project/pyconfigs/connections.py +1 -1
  42. squirrels/package_data/base_project/pyconfigs/context.py +38 -10
  43. squirrels/package_data/base_project/pyconfigs/parameters.py +15 -7
  44. squirrels/package_data/base_project/squirrels.yml.j2 +14 -7
  45. squirrels/package_data/templates/index.html +3 -3
  46. squirrels/parameter_options.py +103 -106
  47. squirrels/parameters.py +347 -195
  48. squirrels/project.py +378 -0
  49. squirrels/user_base.py +14 -6
  50. {squirrels-0.3.2.dist-info → squirrels-0.4.0.dist-info}/METADATA +12 -23
  51. squirrels-0.4.0.dist-info/RECORD +60 -0
  52. squirrels/_timer.py +0 -23
  53. squirrels-0.3.2.dist-info/RECORD +0 -56
  54. {squirrels-0.3.2.dist-info → squirrels-0.4.0.dist-info}/LICENSE +0 -0
  55. {squirrels-0.3.2.dist-info → squirrels-0.4.0.dist-info}/WHEEL +0 -0
  56. {squirrels-0.3.2.dist-info → squirrels-0.4.0.dist-info}/entry_points.txt +0 -0
@@ -2,15 +2,14 @@ from __future__ import annotations
2
2
  from typing import Optional, Sequence
3
3
  from dataclasses import dataclass, field
4
4
  from collections import OrderedDict
5
- import concurrent.futures, pandas as pd
5
+ import time, concurrent.futures, pandas as pd
6
6
 
7
- from . import _utils as u, _constants as c, parameters as p, _parameter_configs as pc, _py_module as pm, _api_response_models as arm
7
+ from . import _utils as u, _constants as c, parameters as p, _parameter_configs as _pc, _py_module as pm, _api_response_models as arm
8
8
  from .arguments.init_time_args import ParametersArgs
9
- from ._manifest import ManifestIO, ParametersConfig
10
- from ._connection_set import ConnectionSetIO
11
- from ._seeds import SeedsIO
9
+ from ._manifest import ParametersConfig, ManifestConfig
10
+ from ._connection_set import ConnectionSet, ConnectionsArgs
11
+ from ._seeds import Seeds
12
12
  from .user_base import User
13
- from ._timer import timer, time
14
13
 
15
14
 
16
15
  @dataclass
@@ -31,25 +30,25 @@ class ParameterSet:
31
30
 
32
31
 
33
32
  @dataclass
34
- class _ParameterConfigsSet:
33
+ class ParameterConfigsSet:
35
34
  """
36
35
  Pool of parameter configs, can create multiple for unit testing purposes
37
36
  """
38
- _data: dict[str, pc.ParameterConfig] = field(default_factory=OrderedDict)
39
- _data_source_params: dict[str, pc.DataSourceParameterConfig] = field(default_factory=dict)
37
+ _data: dict[str, _pc.ParameterConfigBase] = field(default_factory=OrderedDict)
38
+ _data_source_params: dict[str, _pc.DataSourceParameterConfig] = field(default_factory=dict)
40
39
 
41
- def get(self, name: Optional[str]) -> Optional[pc.ParameterConfig]:
40
+ def get(self, name: Optional[str]) -> Optional[_pc.ParameterConfigBase]:
42
41
  try:
43
42
  return self._data[name] if name is not None else None
44
43
  except KeyError as e:
45
44
  raise u.ConfigurationError(f'Unable to find parameter named "{name}"') from e
46
45
 
47
- def add(self, param_config: pc.ParameterConfigBase) -> None:
46
+ def add(self, param_config: _pc.ParameterConfigBase) -> None:
48
47
  self._data[param_config.name] = param_config
49
- if isinstance(param_config, pc.DataSourceParameterConfig):
48
+ if isinstance(param_config, _pc.DataSourceParameterConfig):
50
49
  self._data_source_params[param_config.name] = param_config
51
50
 
52
- def _get_all_ds_param_configs(self) -> Sequence[pc.DataSourceParameterConfig]:
51
+ def _get_all_ds_param_configs(self) -> Sequence[_pc.DataSourceParameterConfig]:
53
52
  return list(self._data_source_params.values())
54
53
 
55
54
  def __convert_datasource_params(self, df_dict: dict[str, pd.DataFrame]) -> None:
@@ -60,11 +59,12 @@ class _ParameterConfigsSet:
60
59
  name = stack[-1]
61
60
  if name not in done:
62
61
  param = self._data_source_params.get(name, self.get(name))
62
+ assert param is not None
63
63
  parent_name = param.parent_name
64
64
  if parent_name is not None and parent_name not in done:
65
65
  stack.append(parent_name)
66
66
  continue
67
- if isinstance(param, pc.DataSourceParameterConfig):
67
+ if isinstance(param, _pc.DataSourceParameterConfig):
68
68
  if name not in df_dict:
69
69
  raise u.ConfigurationError(f'No reference data found for parameter "{name}"')
70
70
  self._data[name] = param.convert(df_dict[name])
@@ -73,12 +73,12 @@ class _ParameterConfigsSet:
73
73
 
74
74
  def __validate_param_relationships(self) -> None:
75
75
  for param_config in self._data.values():
76
- assert isinstance(param_config, pc.ParameterConfig)
76
+ assert isinstance(param_config, _pc.ParameterConfig)
77
77
  parent_name = param_config.parent_name
78
78
  parent = self.get(parent_name)
79
79
  if parent:
80
- if not isinstance(param_config, pc.SelectionParameterConfig):
81
- if not isinstance(parent, pc.SingleSelectParameterConfig):
80
+ if not isinstance(param_config, _pc.SelectionParameterConfig):
81
+ if not isinstance(parent, _pc.SingleSelectParameterConfig):
82
82
  raise u.ConfigurationError(f'Only single-select parameters can be parents of non-select parameters. ' +
83
83
  f'Parameter "{parent_name}" is the parent of non-select parameter ' +
84
84
  f'"{param_config.name}" but "{parent_name}" is not a single-select parameter.')
@@ -92,7 +92,7 @@ class _ParameterConfigsSet:
92
92
  f'among the options of non-select parameter "{param_config.name}".')
93
93
  seen.update(lookup_keys)
94
94
 
95
- if not isinstance(parent, pc.SelectionParameterConfig):
95
+ if not isinstance(parent, _pc.SelectionParameterConfig):
96
96
  raise u.ConfigurationError(f'Only selection parameters can be parents. Parameter "{parent_name}" is the parent of ' +
97
97
  f'"{param_config.name}" but "{parent_name}" is not a selection parameter.')
98
98
 
@@ -107,7 +107,7 @@ class _ParameterConfigsSet:
107
107
  *, updates_only: bool = False, request_version: Optional[int] = None
108
108
  ) -> ParameterSet:
109
109
  if dataset_params is None:
110
- dataset_params = self._data.keys()
110
+ dataset_params = list(self._data.keys())
111
111
 
112
112
  parameters_by_name: dict[str, p.Parameter] = {}
113
113
  params_to_process = selections.keys() if selections and updates_only else dataset_params
@@ -119,6 +119,7 @@ class _ParameterConfigsSet:
119
119
  children = []
120
120
  if curr_name not in parameters_by_name:
121
121
  param_conf = self.get(curr_name)
122
+ assert isinstance(param_conf, _pc.ParameterConfig)
122
123
  parent_name = param_conf.parent_name
123
124
  if parent_name is None:
124
125
  parent = None
@@ -127,9 +128,10 @@ class _ParameterConfigsSet:
127
128
  continue
128
129
  else:
129
130
  parent = parameters_by_name.get(parent_name)
131
+ assert isinstance(parent, p._SelectionParameter) or parent is None
130
132
  param = param_conf.with_selection(selections.get(curr_name), user, parent)
131
133
  parameters_by_name[curr_name] = param
132
- if isinstance(param_conf, pc.SelectionParameterConfig):
134
+ if isinstance(param_conf, _pc.SelectionParameterConfig):
133
135
  children = list(x for x in param_conf.children.keys() if x in dataset_params)
134
136
  stack.pop()
135
137
  stack.extend(children)
@@ -137,21 +139,24 @@ class _ParameterConfigsSet:
137
139
  ordered_parameters = OrderedDict((key, parameters_by_name[key]) for key in dataset_params if key in parameters_by_name)
138
140
  return ParameterSet(ordered_parameters)
139
141
 
140
- def get_all_api_field_info(self) -> dict[str, pc.APIParamFieldInfo]:
141
- return {param: config.get_api_field_info() for param, config in self._data.items()}
142
+ def get_all_api_field_info(self) -> dict[str, _pc.APIParamFieldInfo]:
143
+ api_field_infos = {}
144
+ for param, config in self._data.items():
145
+ assert isinstance(config, _pc.ParameterConfig)
146
+ api_field_infos[param] = config.get_api_field_info()
147
+ return api_field_infos
142
148
 
143
149
 
144
150
  class ParameterConfigsSetIO:
145
151
  """
146
- Static class for the singleton object of __ParameterConfigsPoolData
152
+ Static class for the singleton object of ParameterConfigsSet
147
153
  """
148
- args: ParametersArgs
149
- obj: _ParameterConfigsSet
154
+ obj: ParameterConfigsSet # this is static (set in load_from_file) to simplify development experience for pyconfigs/parameters.py
150
155
 
151
156
  @classmethod
152
- def _GetDfDictFromDataSources(cls) -> dict[str, pd.DataFrame]:
153
- def get_dataframe(ds_param_config: pc.DataSourceParameterConfig) -> tuple[str, pd.DataFrame]:
154
- return ds_param_config.name, ds_param_config.get_dataframe(ConnectionSetIO.obj, SeedsIO.obj)
157
+ def _get_df_dict_from_data_sources(cls, default_conn_name: str, seeds: Seeds, conn_set: ConnectionSet) -> dict[str, pd.DataFrame]:
158
+ def get_dataframe(ds_param_config: _pc.DataSourceParameterConfig) -> tuple[str, pd.DataFrame]:
159
+ return ds_param_config.name, ds_param_config.get_dataframe(default_conn_name, conn_set, seeds)
155
160
 
156
161
  ds_param_configs = cls.obj._get_all_ds_param_configs()
157
162
  with concurrent.futures.ThreadPoolExecutor() as executor:
@@ -160,24 +165,30 @@ class ParameterConfigsSetIO:
160
165
  return df_dict
161
166
 
162
167
  @classmethod
163
- def _AddFromDict(cls, param_config: ParametersConfig) -> None:
168
+ def _add_from_dict(cls, param_config: ParametersConfig) -> None:
164
169
  ptype = getattr(p, param_config.type)
165
170
  factory = getattr(ptype, param_config.factory)
166
171
  factory(**param_config.arguments)
167
172
 
168
173
  @classmethod
169
- def LoadFromFile(cls) -> None:
174
+ def get_param_args(cls, conn_args: ConnectionsArgs) -> ParametersArgs:
175
+ return ParametersArgs(conn_args.proj_vars, conn_args.env_vars)
176
+
177
+ @classmethod
178
+ def load_from_file(
179
+ cls, logger: u.Logger, base_path: str, manifest_cfg: ManifestConfig, seeds: Seeds, conn_set: ConnectionSet, param_args: ParametersArgs
180
+ ) -> ParameterConfigsSet:
170
181
  start = time.time()
171
- cls.obj = _ParameterConfigsSet()
182
+ cls.obj = ParameterConfigsSet()
172
183
 
173
- for param_as_dict in ManifestIO.obj.parameters:
174
- cls._AddFromDict(param_as_dict)
184
+ for param_as_dict in manifest_cfg.parameters:
185
+ cls._add_from_dict(param_as_dict)
175
186
 
176
- conn_args = ConnectionSetIO.args
177
- cls.args = ParametersArgs(conn_args.proj_vars, conn_args.env_vars)
178
- pm.run_pyconfig_main(c.PARAMETERS_FILE, {"sqrl": cls.args})
187
+ pm.run_pyconfig_main(base_path, c.PARAMETERS_FILE, {"sqrl": param_args})
179
188
 
180
- df_dict = cls._GetDfDictFromDataSources()
189
+ default_conn_name = manifest_cfg.settings_obj.get_default_connection_name()
190
+ df_dict = cls._get_df_dict_from_data_sources(default_conn_name, seeds, conn_set)
181
191
  cls.obj._post_process_params(df_dict)
182
192
 
183
- timer.add_activity_time("loading parameters", start)
193
+ logger.log_activity_time("loading parameters", start)
194
+ return cls.obj
squirrels/_py_module.py CHANGED
@@ -1,5 +1,4 @@
1
1
  from typing import Type, Optional, Any
2
- from types import ModuleType
3
2
  import importlib.util
4
3
 
5
4
  from . import _constants as c, _utils as u
@@ -10,25 +9,26 @@ class PyModule:
10
9
  """
11
10
  Constructor for PyModule, an abstract module for a file that may or may not exist
12
11
 
13
- Parameters:
12
+ Arguments:
14
13
  filepath (str | pathlib.Path): The file path to the python module
15
14
  is_required: If true, throw an error if the file path doesn't exist
16
15
  """
17
16
  self.filepath = str(filepath)
18
17
  try:
19
18
  spec = importlib.util.spec_from_file_location(self.filepath, self.filepath)
19
+ assert spec is not None and spec.loader is not None
20
20
  self.module = importlib.util.module_from_spec(spec)
21
21
  spec.loader.exec_module(self.module)
22
22
  except FileNotFoundError as e:
23
23
  if is_required:
24
24
  raise u.ConfigurationError(f"Required file not found: '{self.filepath}'") from e
25
- self.module: Optional[ModuleType] = default_class
25
+ self.module = default_class
26
26
 
27
27
  def get_func_or_class(self, attr_name: str, *, default_attr: Any = None, is_required: bool = True) -> Any:
28
28
  """
29
29
  Get an attribute of the module. Usually a python function or class.
30
30
 
31
- Parameters:
31
+ Arguments:
32
32
  attr_name: The attribute name
33
33
  default_attr: The default function or class to use if the attribute cannot be found
34
34
  is_required: If true, throw an error if the attribute cannot be found, unless default_attr is not None
@@ -42,15 +42,15 @@ class PyModule:
42
42
  return func_or_class
43
43
 
44
44
 
45
- def run_pyconfig_main(filename: str, kwargs: dict[str, Any] = {}) -> None:
45
+ def run_pyconfig_main(base_path: str, filename: str, kwargs: dict[str, Any] = {}) -> None:
46
46
  """
47
47
  Given a python file in the 'pyconfigs' folder, run its main function
48
48
 
49
- Parameters:
49
+ Arguments:
50
50
  filename: The name of the file to run main function
51
51
  kwargs: Dictionary of the main function arguments
52
52
  """
53
- filepath = u.join_paths(c.PYCONFIGS_FOLDER, filename)
53
+ filepath = u.Path(base_path, c.PYCONFIGS_FOLDER, filename)
54
54
  module = PyModule(filepath)
55
55
  main_function = module.get_func_or_class(c.MAIN_FUNC, is_required=False)
56
56
  if main_function:
squirrels/_seeds.py CHANGED
@@ -1,38 +1,39 @@
1
1
  from dataclasses import dataclass
2
- import os, glob, pandas as pd
2
+ import os, time, glob, pandas as pd
3
3
 
4
- from ._timer import timer, time
5
- from ._manifest import ManifestIO
6
- from . import _utils as u, _constants as c
4
+ from ._manifest import ManifestConfig
5
+ from . import _utils as _u, _constants as c
7
6
 
8
7
 
9
8
  @dataclass
10
9
  class Seeds:
11
10
  _data: dict[str, pd.DataFrame]
11
+ _manifest_cfg: ManifestConfig
12
12
 
13
13
  def run_query(self, sql_query: str) -> pd.DataFrame:
14
- return u.run_sql_on_dataframes(sql_query, self._data)
14
+ use_duckdb = self._manifest_cfg.settings_obj.do_use_duckdb()
15
+ return _u.run_sql_on_dataframes(sql_query, self._data, use_duckdb)
15
16
 
16
17
  def get_dataframes(self) -> dict[str, pd.DataFrame]:
17
18
  return self._data.copy()
18
19
 
19
20
 
20
21
  class SeedsIO:
21
- obj: Seeds
22
22
 
23
23
  @classmethod
24
- def LoadFiles(cls) -> None:
24
+ def load_files(cls, logger: _u.Logger, base_path: str, manifest_cfg: ManifestConfig) -> Seeds:
25
25
  start = time.time()
26
- infer_schema: bool = ManifestIO.obj.settings.get(c.SEEDS_INFER_SCHEMA_SETTING, True)
27
- na_values: list[str] = ManifestIO.obj.settings.get(c.SEEDS_NA_VALUES_SETTING, ["NA"])
26
+ infer_schema: bool = manifest_cfg.settings.get(c.SEEDS_INFER_SCHEMA_SETTING, True)
27
+ na_values: list[str] = manifest_cfg.settings.get(c.SEEDS_NA_VALUES_SETTING, ["NA"])
28
28
  csv_dtype = None if infer_schema else str
29
29
 
30
30
  seeds_dict = {}
31
- csv_files = glob.glob(os.path.join(c.SEEDS_FOLDER, '**/*.csv'), recursive=True)
31
+ csv_files = glob.glob(os.path.join(base_path, c.SEEDS_FOLDER, '**/*.csv'), recursive=True)
32
32
  for csv_file in csv_files:
33
33
  file_stem = os.path.splitext(os.path.basename(csv_file))[0]
34
34
  df = pd.read_csv(csv_file, dtype=csv_dtype, keep_default_na=False, na_values=na_values)
35
35
  seeds_dict[file_stem] = df
36
36
 
37
- cls.obj = Seeds(seeds_dict)
38
- timer.add_activity_time("loading seed files", start)
37
+ seeds = Seeds(seeds_dict, manifest_cfg)
38
+ logger.log_activity_time("loading seed files", start)
39
+ return seeds
squirrels/_utils.py CHANGED
@@ -1,6 +1,9 @@
1
1
  from typing import Sequence, Optional, Union, TypeVar, Callable
2
2
  from pathlib import Path
3
- import json, sqlite3, jinja2 as j2, pandas as pd
3
+ from pandas.api import types as pd_types
4
+ from datetime import datetime
5
+ import os, time, logging, json, sqlite3, pandas as pd
6
+ import jinja2 as j2, jinja2.nodes as j2_nodes
4
7
 
5
8
  from . import _constants as c
6
9
 
@@ -21,75 +24,114 @@ class ConfigurationError(Exception):
21
24
  """
22
25
  pass
23
26
 
24
- class FileExecutionError(ConfigurationError):
27
+ class FileExecutionError(Exception):
25
28
  def __init__(self, message: str, error: Exception, *args) -> None:
26
- new_message = message + f"\n... Produced error message `{error}` (scroll up for more details on handled exception)"
29
+ t = " "
30
+ new_message = f"\n" + message + f"\n{t}Produced error message:\n{t}{t}{error} (see above for more details on handled exception)"
27
31
  super().__init__(new_message, *args)
32
+ self.error = error
33
+
34
+
35
+ ## Other utility classes
36
+
37
+ class Logger(logging.Logger):
38
+ def log_activity_time(self, activity: str, start_timestamp: float, *, request_id: str | None = None) -> None:
39
+ end_timestamp = time.time()
40
+ time_taken = round((end_timestamp-start_timestamp) * 10**3, 3)
41
+ data = { "activity": activity, "start_timestamp": start_timestamp, "end_timestamp": end_timestamp, "time_taken_ms": time_taken }
42
+ info = { "request_id": request_id } if request_id else {}
43
+ self.debug(f'Time taken for "{activity}": {time_taken}ms', extra={"data": data, "info": info})
44
+
45
+
46
+ class EnvironmentWithMacros(j2.Environment):
47
+ def __init__(self, logger: logging.Logger, loader: j2.FileSystemLoader, *args, **kwargs):
48
+ super().__init__(*args, loader=loader, **kwargs)
49
+ self._logger = logger
50
+ self._macros = self._load_macro_templates(logger)
51
+
52
+ def _load_macro_templates(self, logger: logging.Logger) -> str:
53
+ macros_dirs = self._get_macro_folders_from_packages()
54
+ macro_templates = []
55
+ for macros_dir in macros_dirs:
56
+ for root, _, files in os.walk(macros_dir):
57
+ files: list[str]
58
+ for filename in files:
59
+ if any(filename.endswith(x) for x in [".sql", ".j2", ".jinja", ".jinja2"]):
60
+ filepath = Path(root, filename)
61
+ logger.info(f"Loaded macros from: {filepath}")
62
+ with open(filepath, 'r') as f:
63
+ content = f.read()
64
+ macro_templates.append(content)
65
+ return '\n'.join(macro_templates)
66
+
67
+ def _get_macro_folders_from_packages(self) -> list[Path]:
68
+ assert isinstance(self.loader, j2.FileSystemLoader)
69
+ packages_folder = Path(self.loader.searchpath[0], c.PACKAGES_FOLDER)
70
+
71
+ subdirectories = []
72
+ if os.path.exists(packages_folder):
73
+ for item in os.listdir(packages_folder):
74
+ item_path = Path(packages_folder, item)
75
+ if os.path.isdir(item_path):
76
+ subdirectories.append(Path(item_path, c.MACROS_FOLDER))
77
+
78
+ subdirectories.append(Path(self.loader.searchpath[0], c.MACROS_FOLDER))
79
+ return subdirectories
28
80
 
81
+ def _parse(self, source: str, name: str | None, filename: str | None) -> j2_nodes.Template:
82
+ source = self._macros + source
83
+ return super()._parse(source, name, filename)
29
84
 
30
- ## Utility functions/variables
31
-
32
- def join_paths(*paths: FilePath) -> Path:
33
- """
34
- Joins paths together.
35
-
36
- Parameters:
37
- paths (str | pathlib.Path): The paths to join.
38
85
 
39
- Returns:
40
- (pathlib.Path) The joined path.
41
- """
42
- return Path(*paths)
86
+ ## Utility functions/variables
43
87
 
88
+ def log_activity_time(logger: logging.Logger, activity: str, start_timestamp: float, *, request_id: str | None = None) -> None:
89
+ end_timestamp = time.time()
90
+ time_taken = round((end_timestamp-start_timestamp) * 10**3, 3)
91
+ data = { "activity": activity, "start_timestamp": start_timestamp, "end_timestamp": end_timestamp, "time_taken_ms": time_taken }
92
+ info = { "request_id": request_id } if request_id else {}
93
+ logger.debug(f'Time taken for "{activity}": {time_taken}ms', extra={"data": data, "info": info})
44
94
 
45
- _j2_env = j2.Environment(loader=j2.FileSystemLoader('.'))
46
95
 
47
- def render_string(raw_str: str, **kwargs: dict) -> str:
96
+ def render_string(raw_str: str, *, base_path: str = ".", **kwargs) -> str:
48
97
  """
49
98
  Given a template string, render it with the given keyword arguments
50
99
 
51
- Parameters:
100
+ Arguments:
52
101
  raw_str: The template string
53
102
  kwargs: The keyword arguments
54
103
 
55
104
  Returns:
56
105
  The rendered string
57
106
  """
58
- template = _j2_env.from_string(raw_str)
107
+ j2_env = j2.Environment(loader=j2.FileSystemLoader(base_path))
108
+ template = j2_env.from_string(raw_str)
59
109
  return template.render(kwargs)
60
110
 
61
111
 
62
- T = TypeVar('T')
63
- def __process_file_handler(file_handler: Callable[[FilePath], T], filepath: FilePath, is_required: bool) -> Optional[T]:
64
- try:
65
- return file_handler(filepath)
66
- except FileNotFoundError as e:
67
- if is_required:
68
- raise ConfigurationError(f"Required file not found: '{str(filepath)}'") from e
69
-
70
-
71
- def read_file(filepath: FilePath, *, is_required: bool = True) -> Optional[str]:
112
+ def read_file(filepath: FilePath) -> str:
72
113
  """
73
114
  Reads a file and return its content if required
74
115
 
75
- Parameters:
116
+ Arguments:
76
117
  filepath (str | pathlib.Path): The path to the file to read
77
118
  is_required: If true, throw error if file doesn't exist
78
119
 
79
120
  Returns:
80
121
  Content of the file, or None if doesn't exist and not required
81
122
  """
82
- def file_handler(filepath: FilePath):
123
+ try:
83
124
  with open(filepath, 'r') as f:
84
125
  return f.read()
85
- return __process_file_handler(file_handler, filepath, is_required)
126
+ except FileNotFoundError as e:
127
+ raise ConfigurationError(f"Required file not found: '{str(filepath)}'") from e
86
128
 
87
129
 
88
130
  def normalize_name(name: str) -> str:
89
131
  """
90
132
  Normalizes names to the convention of the squirrels manifest file.
91
133
 
92
- Parameters:
134
+ Arguments:
93
135
  name: The name to normalize.
94
136
 
95
137
  Returns:
@@ -102,7 +144,7 @@ def normalize_name_for_api(name: str) -> str:
102
144
  """
103
145
  Normalizes names to the REST API convention.
104
146
 
105
- Parameters:
147
+ Arguments:
106
148
  name: The name to normalize.
107
149
 
108
150
  Returns:
@@ -115,7 +157,7 @@ def load_json_or_comma_delimited_str_as_list(input_str: Union[str, Sequence]) ->
115
157
  """
116
158
  Given a string, load it as a list either by json string or comma delimited value
117
159
 
118
- Parameters:
160
+ Arguments:
119
161
  input_str: The input string
120
162
 
121
163
  Returns:
@@ -143,7 +185,7 @@ def process_if_not_none(input_val: Optional[X], processor: Callable[[X], Y]) ->
143
185
  """
144
186
  Given a input value and a function that processes the value, return the output of the function unless input is None
145
187
 
146
- Parameters:
188
+ Arguments:
147
189
  input_val: The input value
148
190
  processor: The function that processes the input value
149
191
 
@@ -155,42 +197,60 @@ def process_if_not_none(input_val: Optional[X], processor: Callable[[X], Y]) ->
155
197
  return processor(input_val)
156
198
 
157
199
 
158
- def use_duckdb() -> bool:
159
- """
160
- Determines whether to use DuckDB instead of SQLite for embedded database
161
-
162
- Returns:
163
- A boolean
164
- """
165
- from ._manifest import ManifestIO
166
- return (ManifestIO.obj.settings.get(c.IN_MEMORY_DB_SETTING, c.SQLITE) == c.DUCKDB)
167
-
168
-
169
- def run_sql_on_dataframes(sql_query: str, dataframes: dict[str, pd.DataFrame]) -> pd.DataFrame:
200
+ def run_sql_on_dataframes(sql_query: str, dataframes: dict[str, pd.DataFrame], do_use_duckdb: bool) -> pd.DataFrame:
170
201
  """
171
202
  Runs a SQL query against a collection of dataframes
172
203
 
173
- Parameters:
204
+ Arguments:
174
205
  sql_query: The SQL query to run
175
206
  dataframes: A dictionary of table names to their pandas Dataframe
176
207
 
177
208
  Returns:
178
209
  The result as a pandas Dataframe from running the query
179
210
  """
180
- do_use_duckdb = use_duckdb()
181
211
  if do_use_duckdb:
182
212
  import duckdb
183
- conn = duckdb.connect()
213
+ duckdb_conn = duckdb.connect()
184
214
  else:
185
215
  conn = sqlite3.connect(":memory:")
186
216
 
187
217
  try:
188
218
  for name, df in dataframes.items():
189
219
  if do_use_duckdb:
190
- conn.execute(f"CREATE TABLE {name} AS FROM df")
220
+ duckdb_conn.execute(f"CREATE TABLE {name} AS FROM df")
191
221
  else:
192
222
  df.to_sql(name, conn, index=False)
193
223
 
194
- return conn.execute(sql_query).df() if do_use_duckdb else pd.read_sql(sql_query, conn)
224
+ return duckdb_conn.execute(sql_query).df() if do_use_duckdb else pd.read_sql(sql_query, conn)
195
225
  finally:
196
- conn.close()
226
+ duckdb_conn.close() if do_use_duckdb else conn.close()
227
+
228
+
229
+ def df_to_json0(df: pd.DataFrame, dimensions: list[str] | None = None) -> dict:
230
+ """
231
+ Convert a pandas DataFrame to the response format that the dataset result API of Squirrels outputs.
232
+
233
+ Arguments:
234
+ df: The dataframe to convert into an API response
235
+ dimensions: The list of declared dimensions. If None, all non-numeric columns are assumed as dimensions
236
+
237
+ Returns:
238
+ The response of a Squirrels dataset result API
239
+ """
240
+ in_df_json = json.loads(df.to_json(orient='table', index=False))
241
+ out_fields = []
242
+ non_numeric_fields = []
243
+ for in_column in in_df_json["schema"]["fields"]:
244
+ col_name: str = in_column["name"]
245
+ out_column = { "name": col_name, "type": in_column["type"] }
246
+ out_fields.append(out_column)
247
+
248
+ if not pd_types.is_numeric_dtype(df[col_name].dtype):
249
+ non_numeric_fields.append(col_name)
250
+
251
+ out_dimensions = non_numeric_fields if dimensions is None else dimensions
252
+ dataset_json = {
253
+ "schema": { "fields": out_fields, "dimensions": out_dimensions },
254
+ "data": in_df_json["data"]
255
+ }
256
+ return dataset_json
squirrels/_version.py CHANGED
@@ -1,3 +1,3 @@
1
- from . import __version__
1
+ __version__ = '0.4.0'
2
2
 
3
3
  sq_major_version, sq_minor_version, sq_patch_version = __version__.split('.')[:3]
@@ -1,32 +1,38 @@
1
- from typing import Optional, Callable, Any
1
+ from typing import Callable, Any
2
2
  from dataclasses import dataclass
3
3
 
4
4
 
5
5
  @dataclass
6
6
  class BaseArguments:
7
- proj_vars: dict[str, Any]
8
- env_vars: dict[str, Any]
7
+ _proj_vars: dict[str, Any]
8
+ _env_vars: dict[str, Any]
9
+
10
+ @property
11
+ def proj_vars(self) -> dict[str, Any]:
12
+ return self._proj_vars.copy()
13
+
14
+ @property
15
+ def env_vars(self) -> dict[str, Any]:
16
+ return self._env_vars.copy()
9
17
 
10
18
 
11
19
  @dataclass
12
20
  class ConnectionsArgs(BaseArguments):
13
- _get_credential: Callable[[str], tuple[str, str]]
14
-
15
- def __post_init__(self):
16
- self.get_credential = self._get_credential
21
+ _get_credential: Callable[[str | None], tuple[str, str]]
17
22
 
18
- def get_credential(self, key: Optional[str]) -> tuple[str, str]:
23
+ def get_credential(self, key: str | None) -> tuple[str, str]:
19
24
  """
20
- Return (username, password) tuple configured for credentials key in environcfg.yaml
25
+ Return (username, password) tuple configured for credentials key in env.yaml
21
26
 
22
27
  If key is None, returns tuple of empty strings ("", "")
23
28
 
24
- Parameters:
29
+ Arguments:
25
30
  key: The credentials key
26
31
 
27
32
  Returns:
28
33
  A tuple of 2 strings
29
34
  """
35
+ return self._get_credential(key)
30
36
 
31
37
 
32
38
  @dataclass