squirrels 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of squirrels might be problematic. Click here for more details.

Files changed (48) hide show
  1. squirrels/__init__.py +11 -4
  2. squirrels/_api_response_models.py +118 -0
  3. squirrels/_api_server.py +140 -75
  4. squirrels/_authenticator.py +10 -8
  5. squirrels/_command_line.py +17 -11
  6. squirrels/_connection_set.py +2 -2
  7. squirrels/_constants.py +13 -5
  8. squirrels/_initializer.py +23 -13
  9. squirrels/_manifest.py +20 -10
  10. squirrels/_models.py +303 -148
  11. squirrels/_parameter_configs.py +195 -57
  12. squirrels/_parameter_sets.py +14 -17
  13. squirrels/_py_module.py +2 -4
  14. squirrels/_seeds.py +38 -0
  15. squirrels/_utils.py +41 -33
  16. squirrels/arguments/run_time_args.py +76 -34
  17. squirrels/data_sources.py +172 -51
  18. squirrels/dateutils.py +3 -3
  19. squirrels/package_data/assets/index.js +14 -14
  20. squirrels/package_data/base_project/connections.yml +1 -1
  21. squirrels/package_data/base_project/database/expenses.db +0 -0
  22. squirrels/package_data/base_project/docker/Dockerfile +1 -1
  23. squirrels/package_data/base_project/environcfg.yml +7 -7
  24. squirrels/package_data/base_project/models/dbviews/database_view1.py +25 -14
  25. squirrels/package_data/base_project/models/dbviews/database_view1.sql +21 -14
  26. squirrels/package_data/base_project/models/federates/dataset_example.py +6 -5
  27. squirrels/package_data/base_project/models/federates/dataset_example.sql +1 -1
  28. squirrels/package_data/base_project/parameters.yml +57 -28
  29. squirrels/package_data/base_project/pyconfigs/auth.py +11 -10
  30. squirrels/package_data/base_project/pyconfigs/connections.py +6 -8
  31. squirrels/package_data/base_project/pyconfigs/context.py +49 -33
  32. squirrels/package_data/base_project/pyconfigs/parameters.py +62 -30
  33. squirrels/package_data/base_project/seeds/seed_categories.csv +6 -0
  34. squirrels/package_data/base_project/seeds/seed_subcategories.csv +15 -0
  35. squirrels/package_data/base_project/squirrels.yml.j2 +37 -20
  36. squirrels/parameter_options.py +30 -10
  37. squirrels/parameters.py +300 -70
  38. squirrels/user_base.py +3 -13
  39. squirrels-0.3.0.dist-info/LICENSE +201 -0
  40. {squirrels-0.2.1.dist-info → squirrels-0.3.0.dist-info}/METADATA +15 -15
  41. squirrels-0.3.0.dist-info/RECORD +56 -0
  42. {squirrels-0.2.1.dist-info → squirrels-0.3.0.dist-info}/WHEEL +1 -1
  43. squirrels/package_data/base_project/seeds/mocks/category.csv +0 -3
  44. squirrels/package_data/base_project/seeds/mocks/max_filter.csv +0 -2
  45. squirrels/package_data/base_project/seeds/mocks/subcategory.csv +0 -6
  46. squirrels-0.2.1.dist-info/LICENSE +0 -22
  47. squirrels-0.2.1.dist-info/RECORD +0 -55
  48. {squirrels-0.2.1.dist-info → squirrels-0.3.0.dist-info}/entry_points.txt +0 -0
squirrels/_utils.py CHANGED
@@ -1,7 +1,6 @@
1
- from typing import Sequence, Optional, Union, Any, TypeVar, Callable
1
+ from typing import Sequence, Optional, Union, TypeVar, Callable
2
2
  from pathlib import Path
3
- from pandas.api import types as pd_types
4
- import json, jinja2 as j2, pandas as pd
3
+ import json, sqlite3, jinja2 as j2, pandas as pd
5
4
 
6
5
  from . import _constants as c
7
6
 
@@ -24,7 +23,7 @@ class ConfigurationError(Exception):
24
23
 
25
24
  class FileExecutionError(ConfigurationError):
26
25
  def __init__(self, message: str, error: Exception, *args) -> None:
27
- new_message = message + f"\n... Produced error message `{error}` (see above for more details)"
26
+ new_message = message + f"\n... Produced error message `{error}` (scroll up for more details on handled exception)"
28
27
  super().__init__(new_message, *args)
29
28
 
30
29
 
@@ -45,7 +44,7 @@ def join_paths(*paths: FilePath) -> Path:
45
44
 
46
45
  _j2_env = j2.Environment(loader=j2.FileSystemLoader('.'))
47
46
 
48
- def render_string(raw_str: str, kwargs: dict) -> str:
47
+ def render_string(raw_str: str, **kwargs: dict) -> str:
49
48
  """
50
49
  Given a template string, render it with the given keyword arguments
51
50
 
@@ -112,33 +111,6 @@ def normalize_name_for_api(name: str) -> str:
112
111
  return name.replace('_', '-')
113
112
 
114
113
 
115
- def df_to_json0(df: pd.DataFrame, dimensions: list[str] = None) -> dict[str, Any]:
116
- """
117
- Convert a pandas DataFrame to the same JSON format that the dataset result API of Squirrels outputs.
118
-
119
- Parameters:
120
- df: The dataframe to convert into JSON
121
- dimensions: The list of declared dimensions. If None, all non-numeric columns are assumed as dimensions
122
-
123
- Returns:
124
- The JSON response of a Squirrels dataset result API
125
- """
126
- in_df_json = json.loads(df.to_json(orient='table', index=False))
127
- out_fields = []
128
- non_numeric_fields = []
129
- for in_column in in_df_json["schema"]["fields"]:
130
- col_name: str = in_column["name"]
131
- out_column = {"name": col_name, "type": in_column["type"]}
132
- out_fields.append(out_column)
133
-
134
- if not pd_types.is_numeric_dtype(df[col_name].dtype):
135
- non_numeric_fields.append(col_name)
136
-
137
- out_dimensions = non_numeric_fields if dimensions is None else dimensions
138
- out_schema = {"fields": out_fields, "dimensions": out_dimensions}
139
- return {"schema": out_schema, "data": in_df_json["data"]}
140
-
141
-
142
114
  def load_json_or_comma_delimited_str_as_list(input_str: Union[str, Sequence]) -> Sequence[str]:
143
115
  """
144
116
  Given a string, load it as a list either by json string or comma delimited value
@@ -183,6 +155,42 @@ def process_if_not_none(input_val: Optional[X], processor: Callable[[X], Y]) ->
183
155
  return processor(input_val)
184
156
 
185
157
 
186
- def use_duckdb():
158
+ def use_duckdb() -> bool:
159
+ """
160
+ Determines whether to use DuckDB instead of SQLite for embedded database
161
+
162
+ Returns:
163
+ A boolean
164
+ """
187
165
  from ._manifest import ManifestIO
188
166
  return (ManifestIO.obj.settings.get(c.IN_MEMORY_DB_SETTING, c.SQLITE) == c.DUCKDB)
167
+
168
+
169
+ def run_sql_on_dataframes(sql_query: str, dataframes: dict[str, pd.DataFrame]) -> pd.DataFrame:
170
+ """
171
+ Runs a SQL query against a collection of dataframes
172
+
173
+ Parameters:
174
+ sql_query: The SQL query to run
175
+ dataframes: A dictionary of table names to their pandas Dataframe
176
+
177
+ Returns:
178
+ The result as a pandas Dataframe from running the query
179
+ """
180
+ do_use_duckdb = use_duckdb()
181
+ if do_use_duckdb:
182
+ import duckdb
183
+ conn = duckdb.connect()
184
+ else:
185
+ conn = sqlite3.connect(":memory:")
186
+
187
+ try:
188
+ for name, df in dataframes.items():
189
+ if do_use_duckdb:
190
+ conn.execute(f"CREATE TABLE {name} AS FROM df")
191
+ else:
192
+ df.to_sql(name, conn, index=False)
193
+
194
+ return conn.execute(sql_query).df() if do_use_duckdb else pd.read_sql(sql_query, conn)
195
+ finally:
196
+ conn.close()
@@ -1,11 +1,11 @@
1
- from typing import Callable, Any
1
+ from typing import Union, Callable, Optional, Any
2
2
  from dataclasses import dataclass
3
3
  from sqlalchemy import Engine
4
- import pandas as pd, sqlite3
4
+ import pandas as pd
5
5
 
6
6
  from .init_time_args import ConnectionsArgs, ParametersArgs
7
7
  from ..user_base import User
8
- from ..parameters import Parameter
8
+ from ..parameters import Parameter, _TextValue
9
9
  from .._connection_set import ConnectionSetIO
10
10
  from .. import _utils as u
11
11
 
@@ -22,6 +22,32 @@ class ContextArgs(ParametersArgs):
22
22
  user: User
23
23
  prms: dict[str, Parameter]
24
24
  traits: dict[str, Any]
25
+ _placeholders: dict[str, Any]
26
+
27
+ def set_placeholder(self, placeholder: str, value: Union[_TextValue, Any]) -> None:
28
+ """
29
+ Method to set a placeholder value.
30
+
31
+ Parameters:
32
+ placeholder: A string for the name of the placeholder
33
+ value: The value of the placeholder. Can be of any type
34
+ """
35
+ if isinstance(value, _TextValue):
36
+ value = value._value_do_not_touch
37
+ self._placeholders[placeholder] = value
38
+
39
+ def param_exists(self, param_name: str) -> bool:
40
+ """
41
+ Method to check whether a given parameter exists and is enabled (i.e., not hidden based on other parameter selections) for the current
42
+ dataset at runtime.
43
+
44
+ Parameters:
45
+ param_name: A string for the name of the parameter
46
+
47
+ Returns:
48
+ A boolean for whether the parameter exists
49
+ """
50
+ return (param_name in self.prms and self.prms[param_name].is_enabled())
25
51
 
26
52
 
27
53
  @dataclass
@@ -32,12 +58,43 @@ class ModelDepsArgs(ContextArgs):
32
58
  @dataclass
33
59
  class ModelArgs(ModelDepsArgs):
34
60
  connection_name: str
35
- connections: dict[str, Engine]
61
+ _connections: dict[str, Engine]
62
+ _dependencies: set[str]
36
63
  _ref: Callable[[str], pd.DataFrame]
37
- dependencies: set[str]
38
64
 
39
- def __post_init__(self):
40
- self.ref = self._ref
65
+ @property
66
+ def connections(self) -> dict[str, Engine]:
67
+ return self._connections.copy()
68
+
69
+ @property
70
+ def dependencies(self) -> set[str]:
71
+ return self._dependencies.copy()
72
+
73
+ def is_placeholder(self, placeholder: str) -> bool:
74
+ """
75
+ Checks whether a name is a valid placeholder
76
+
77
+ Parameters:
78
+ placeholder: A string for the name of the placeholder
79
+
80
+ Returns:
81
+ A boolean for whether name exists
82
+ """
83
+ return placeholder in self._placeholders
84
+
85
+ def get_placeholder_value(self, placeholder: str) -> Optional[Any]:
86
+ """
87
+ Gets the value of a placeholder.
88
+
89
+ USE WITH CAUTION. Do not use the return value directly in a SQL query since that could be prone to SQL injection
90
+
91
+ Parameters:
92
+ placeholder: A string for the name of the placeholder
93
+
94
+ Returns:
95
+ An type for the value of the placeholder
96
+ """
97
+ return self._placeholders.get(placeholder)
41
98
 
42
99
  def ref(self, model: str) -> pd.DataFrame:
43
100
  """
@@ -52,49 +109,34 @@ class ModelArgs(ModelDepsArgs):
52
109
  Returns:
53
110
  A pandas DataFrame
54
111
  """
112
+ return self._ref(model)
55
113
 
56
- def run_external_sql(self, sql: str, *, connection_name: str = None, **kwargs) -> pd.DataFrame:
114
+ def run_external_sql(self, sql_query: str, *, connection_name: str = None, **kwargs) -> pd.DataFrame:
57
115
  """
58
- Runs a SQL query against an external database, with option to specify the connection name
116
+ Runs a SQL query against an external database, with option to specify the connection name. Placeholder values are provided automatically
59
117
 
60
118
  Parameters:
61
- sql: The SQL query
119
+ sql_query: The SQL query. Can be parameterized with placeholders
62
120
  connection_name: The connection name for the database. If None, uses the one configured for the model
63
121
 
64
122
  Returns:
65
123
  The query result as a pandas DataFrame
66
124
  """
67
125
  connection_name = self.connection_name if connection_name is None else connection_name
68
- return ConnectionSetIO.obj.run_sql_query_from_conn_name(sql, connection_name)
126
+ return ConnectionSetIO.obj.run_sql_query_from_conn_name(sql_query, connection_name, self._placeholders)
69
127
 
70
- def run_sql_on_dataframes(self, query: str, *, dataframes: dict[str, pd.DataFrame] = None, **kwargs) -> pd.DataFrame:
128
+ def run_sql_on_dataframes(self, sql_query: str, *, dataframes: dict[str, pd.DataFrame] = None, **kwargs) -> pd.DataFrame:
71
129
  """
72
- Uses a dictionary of dataframes to execute a SQL query in an in-memory sqlite database
130
+ Uses a dictionary of dataframes to execute a SQL query in an embedded in-memory database (sqlite or duckdb based on setting)
73
131
 
74
132
  Parameters:
75
- query: The SQL query to run using sqlite
76
- dataframes: A dictionary of table names to their pandas Dataframe
133
+ sql_query: The SQL query to run
134
+ dataframes: A dictionary of table names to their pandas Dataframe. If None, uses results of dependent models
77
135
 
78
136
  Returns:
79
137
  The result as a pandas Dataframe from running the query
80
138
  """
81
139
  if dataframes is None:
82
- dataframes = {x: self.ref(x) for x in self.dependencies}
83
-
84
- use_duckdb = u.use_duckdb()
85
- if use_duckdb:
86
- import duckdb
87
- conn = duckdb.connect()
88
- else:
89
- conn = sqlite3.connect(":memory:")
90
-
91
- try:
92
- for name, df in dataframes.items():
93
- if use_duckdb:
94
- conn.execute(f"CREATE TABLE {name} AS FROM df")
95
- else:
96
- df.to_sql(name, conn, index=False)
97
-
98
- return conn.execute(query).df() if use_duckdb else pd.read_sql(query, conn)
99
- finally:
100
- conn.close()
140
+ dataframes = {x: self.ref(x) for x in self._dependencies}
141
+
142
+ return u.run_sql_on_dataframes(sql_query, dataframes)