squirrels 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of squirrels might be problematic. Click here for more details.
- squirrels/__init__.py +11 -4
- squirrels/_api_response_models.py +118 -0
- squirrels/_api_server.py +140 -75
- squirrels/_authenticator.py +10 -8
- squirrels/_command_line.py +17 -11
- squirrels/_connection_set.py +2 -2
- squirrels/_constants.py +13 -5
- squirrels/_initializer.py +23 -13
- squirrels/_manifest.py +20 -10
- squirrels/_models.py +295 -142
- squirrels/_parameter_configs.py +195 -57
- squirrels/_parameter_sets.py +14 -17
- squirrels/_py_module.py +2 -4
- squirrels/_seeds.py +38 -0
- squirrels/_utils.py +41 -33
- squirrels/arguments/run_time_args.py +76 -34
- squirrels/data_sources.py +172 -51
- squirrels/dateutils.py +3 -3
- squirrels/package_data/assets/index.js +14 -14
- squirrels/package_data/base_project/connections.yml +1 -1
- squirrels/package_data/base_project/database/expenses.db +0 -0
- squirrels/package_data/base_project/docker/Dockerfile +1 -1
- squirrels/package_data/base_project/environcfg.yml +7 -7
- squirrels/package_data/base_project/models/dbviews/database_view1.py +25 -14
- squirrels/package_data/base_project/models/dbviews/database_view1.sql +21 -14
- squirrels/package_data/base_project/models/federates/dataset_example.py +6 -5
- squirrels/package_data/base_project/models/federates/dataset_example.sql +1 -1
- squirrels/package_data/base_project/parameters.yml +57 -28
- squirrels/package_data/base_project/pyconfigs/auth.py +11 -10
- squirrels/package_data/base_project/pyconfigs/connections.py +6 -8
- squirrels/package_data/base_project/pyconfigs/context.py +49 -33
- squirrels/package_data/base_project/pyconfigs/parameters.py +62 -30
- squirrels/package_data/base_project/seeds/seed_categories.csv +6 -0
- squirrels/package_data/base_project/seeds/seed_subcategories.csv +15 -0
- squirrels/package_data/base_project/squirrels.yml.j2 +37 -20
- squirrels/parameter_options.py +30 -10
- squirrels/parameters.py +300 -70
- squirrels/user_base.py +3 -13
- squirrels-0.3.0.dist-info/LICENSE +201 -0
- {squirrels-0.2.2.dist-info → squirrels-0.3.0.dist-info}/METADATA +15 -15
- squirrels-0.3.0.dist-info/RECORD +56 -0
- squirrels/package_data/base_project/seeds/mocks/category.csv +0 -3
- squirrels/package_data/base_project/seeds/mocks/max_filter.csv +0 -2
- squirrels/package_data/base_project/seeds/mocks/subcategory.csv +0 -6
- squirrels-0.2.2.dist-info/LICENSE +0 -22
- squirrels-0.2.2.dist-info/RECORD +0 -55
- {squirrels-0.2.2.dist-info → squirrels-0.3.0.dist-info}/WHEEL +0 -0
- {squirrels-0.2.2.dist-info → squirrels-0.3.0.dist-info}/entry_points.txt +0 -0
squirrels/_utils.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
from typing import Sequence, Optional, Union,
|
|
1
|
+
from typing import Sequence, Optional, Union, TypeVar, Callable
|
|
2
2
|
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
import json, jinja2 as j2, pandas as pd
|
|
3
|
+
import json, sqlite3, jinja2 as j2, pandas as pd
|
|
5
4
|
|
|
6
5
|
from . import _constants as c
|
|
7
6
|
|
|
@@ -24,7 +23,7 @@ class ConfigurationError(Exception):
|
|
|
24
23
|
|
|
25
24
|
class FileExecutionError(ConfigurationError):
|
|
26
25
|
def __init__(self, message: str, error: Exception, *args) -> None:
|
|
27
|
-
new_message = message + f"\n... Produced error message `{error}` (
|
|
26
|
+
new_message = message + f"\n... Produced error message `{error}` (scroll up for more details on handled exception)"
|
|
28
27
|
super().__init__(new_message, *args)
|
|
29
28
|
|
|
30
29
|
|
|
@@ -45,7 +44,7 @@ def join_paths(*paths: FilePath) -> Path:
|
|
|
45
44
|
|
|
46
45
|
_j2_env = j2.Environment(loader=j2.FileSystemLoader('.'))
|
|
47
46
|
|
|
48
|
-
def render_string(raw_str: str, kwargs: dict) -> str:
|
|
47
|
+
def render_string(raw_str: str, **kwargs: dict) -> str:
|
|
49
48
|
"""
|
|
50
49
|
Given a template string, render it with the given keyword arguments
|
|
51
50
|
|
|
@@ -112,33 +111,6 @@ def normalize_name_for_api(name: str) -> str:
|
|
|
112
111
|
return name.replace('_', '-')
|
|
113
112
|
|
|
114
113
|
|
|
115
|
-
def df_to_json0(df: pd.DataFrame, dimensions: list[str] = None) -> dict[str, Any]:
|
|
116
|
-
"""
|
|
117
|
-
Convert a pandas DataFrame to the same JSON format that the dataset result API of Squirrels outputs.
|
|
118
|
-
|
|
119
|
-
Parameters:
|
|
120
|
-
df: The dataframe to convert into JSON
|
|
121
|
-
dimensions: The list of declared dimensions. If None, all non-numeric columns are assumed as dimensions
|
|
122
|
-
|
|
123
|
-
Returns:
|
|
124
|
-
The JSON response of a Squirrels dataset result API
|
|
125
|
-
"""
|
|
126
|
-
in_df_json = json.loads(df.to_json(orient='table', index=False))
|
|
127
|
-
out_fields = []
|
|
128
|
-
non_numeric_fields = []
|
|
129
|
-
for in_column in in_df_json["schema"]["fields"]:
|
|
130
|
-
col_name: str = in_column["name"]
|
|
131
|
-
out_column = {"name": col_name, "type": in_column["type"]}
|
|
132
|
-
out_fields.append(out_column)
|
|
133
|
-
|
|
134
|
-
if not pd_types.is_numeric_dtype(df[col_name].dtype):
|
|
135
|
-
non_numeric_fields.append(col_name)
|
|
136
|
-
|
|
137
|
-
out_dimensions = non_numeric_fields if dimensions is None else dimensions
|
|
138
|
-
out_schema = {"fields": out_fields, "dimensions": out_dimensions}
|
|
139
|
-
return {"schema": out_schema, "data": in_df_json["data"]}
|
|
140
|
-
|
|
141
|
-
|
|
142
114
|
def load_json_or_comma_delimited_str_as_list(input_str: Union[str, Sequence]) -> Sequence[str]:
|
|
143
115
|
"""
|
|
144
116
|
Given a string, load it as a list either by json string or comma delimited value
|
|
@@ -183,6 +155,42 @@ def process_if_not_none(input_val: Optional[X], processor: Callable[[X], Y]) ->
|
|
|
183
155
|
return processor(input_val)
|
|
184
156
|
|
|
185
157
|
|
|
186
|
-
def use_duckdb():
|
|
158
|
+
def use_duckdb() -> bool:
|
|
159
|
+
"""
|
|
160
|
+
Determines whether to use DuckDB instead of SQLite for embedded database
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
A boolean
|
|
164
|
+
"""
|
|
187
165
|
from ._manifest import ManifestIO
|
|
188
166
|
return (ManifestIO.obj.settings.get(c.IN_MEMORY_DB_SETTING, c.SQLITE) == c.DUCKDB)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def run_sql_on_dataframes(sql_query: str, dataframes: dict[str, pd.DataFrame]) -> pd.DataFrame:
|
|
170
|
+
"""
|
|
171
|
+
Runs a SQL query against a collection of dataframes
|
|
172
|
+
|
|
173
|
+
Parameters:
|
|
174
|
+
sql_query: The SQL query to run
|
|
175
|
+
dataframes: A dictionary of table names to their pandas Dataframe
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
The result as a pandas Dataframe from running the query
|
|
179
|
+
"""
|
|
180
|
+
do_use_duckdb = use_duckdb()
|
|
181
|
+
if do_use_duckdb:
|
|
182
|
+
import duckdb
|
|
183
|
+
conn = duckdb.connect()
|
|
184
|
+
else:
|
|
185
|
+
conn = sqlite3.connect(":memory:")
|
|
186
|
+
|
|
187
|
+
try:
|
|
188
|
+
for name, df in dataframes.items():
|
|
189
|
+
if do_use_duckdb:
|
|
190
|
+
conn.execute(f"CREATE TABLE {name} AS FROM df")
|
|
191
|
+
else:
|
|
192
|
+
df.to_sql(name, conn, index=False)
|
|
193
|
+
|
|
194
|
+
return conn.execute(sql_query).df() if do_use_duckdb else pd.read_sql(sql_query, conn)
|
|
195
|
+
finally:
|
|
196
|
+
conn.close()
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
from typing import Callable, Any
|
|
1
|
+
from typing import Union, Callable, Optional, Any
|
|
2
2
|
from dataclasses import dataclass
|
|
3
3
|
from sqlalchemy import Engine
|
|
4
|
-
import pandas as pd
|
|
4
|
+
import pandas as pd
|
|
5
5
|
|
|
6
6
|
from .init_time_args import ConnectionsArgs, ParametersArgs
|
|
7
7
|
from ..user_base import User
|
|
8
|
-
from ..parameters import Parameter
|
|
8
|
+
from ..parameters import Parameter, _TextValue
|
|
9
9
|
from .._connection_set import ConnectionSetIO
|
|
10
10
|
from .. import _utils as u
|
|
11
11
|
|
|
@@ -22,6 +22,32 @@ class ContextArgs(ParametersArgs):
|
|
|
22
22
|
user: User
|
|
23
23
|
prms: dict[str, Parameter]
|
|
24
24
|
traits: dict[str, Any]
|
|
25
|
+
_placeholders: dict[str, Any]
|
|
26
|
+
|
|
27
|
+
def set_placeholder(self, placeholder: str, value: Union[_TextValue, Any]) -> None:
|
|
28
|
+
"""
|
|
29
|
+
Method to set a placeholder value.
|
|
30
|
+
|
|
31
|
+
Parameters:
|
|
32
|
+
placeholder: A string for the name of the placeholder
|
|
33
|
+
value: The value of the placeholder. Can be of any type
|
|
34
|
+
"""
|
|
35
|
+
if isinstance(value, _TextValue):
|
|
36
|
+
value = value._value_do_not_touch
|
|
37
|
+
self._placeholders[placeholder] = value
|
|
38
|
+
|
|
39
|
+
def param_exists(self, param_name: str) -> bool:
|
|
40
|
+
"""
|
|
41
|
+
Method to check whether a given parameter exists and is enabled (i.e., not hidden based on other parameter selections) for the current
|
|
42
|
+
dataset at runtime.
|
|
43
|
+
|
|
44
|
+
Parameters:
|
|
45
|
+
param_name: A string for the name of the parameter
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
A boolean for whether the parameter exists
|
|
49
|
+
"""
|
|
50
|
+
return (param_name in self.prms and self.prms[param_name].is_enabled())
|
|
25
51
|
|
|
26
52
|
|
|
27
53
|
@dataclass
|
|
@@ -32,12 +58,43 @@ class ModelDepsArgs(ContextArgs):
|
|
|
32
58
|
@dataclass
|
|
33
59
|
class ModelArgs(ModelDepsArgs):
|
|
34
60
|
connection_name: str
|
|
35
|
-
|
|
61
|
+
_connections: dict[str, Engine]
|
|
62
|
+
_dependencies: set[str]
|
|
36
63
|
_ref: Callable[[str], pd.DataFrame]
|
|
37
|
-
dependencies: set[str]
|
|
38
64
|
|
|
39
|
-
|
|
40
|
-
|
|
65
|
+
@property
|
|
66
|
+
def connections(self) -> dict[str, Engine]:
|
|
67
|
+
return self._connections.copy()
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def dependencies(self) -> set[str]:
|
|
71
|
+
return self._dependencies.copy()
|
|
72
|
+
|
|
73
|
+
def is_placeholder(self, placeholder: str) -> bool:
|
|
74
|
+
"""
|
|
75
|
+
Checks whether a name is a valid placeholder
|
|
76
|
+
|
|
77
|
+
Parameters:
|
|
78
|
+
placeholder: A string for the name of the placeholder
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
A boolean for whether name exists
|
|
82
|
+
"""
|
|
83
|
+
return placeholder in self._placeholders
|
|
84
|
+
|
|
85
|
+
def get_placeholder_value(self, placeholder: str) -> Optional[Any]:
|
|
86
|
+
"""
|
|
87
|
+
Gets the value of a placeholder.
|
|
88
|
+
|
|
89
|
+
USE WITH CAUTION. Do not use the return value directly in a SQL query since that could be prone to SQL injection
|
|
90
|
+
|
|
91
|
+
Parameters:
|
|
92
|
+
placeholder: A string for the name of the placeholder
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
An type for the value of the placeholder
|
|
96
|
+
"""
|
|
97
|
+
return self._placeholders.get(placeholder)
|
|
41
98
|
|
|
42
99
|
def ref(self, model: str) -> pd.DataFrame:
|
|
43
100
|
"""
|
|
@@ -52,49 +109,34 @@ class ModelArgs(ModelDepsArgs):
|
|
|
52
109
|
Returns:
|
|
53
110
|
A pandas DataFrame
|
|
54
111
|
"""
|
|
112
|
+
return self._ref(model)
|
|
55
113
|
|
|
56
|
-
def run_external_sql(self,
|
|
114
|
+
def run_external_sql(self, sql_query: str, *, connection_name: str = None, **kwargs) -> pd.DataFrame:
|
|
57
115
|
"""
|
|
58
|
-
Runs a SQL query against an external database, with option to specify the connection name
|
|
116
|
+
Runs a SQL query against an external database, with option to specify the connection name. Placeholder values are provided automatically
|
|
59
117
|
|
|
60
118
|
Parameters:
|
|
61
|
-
|
|
119
|
+
sql_query: The SQL query. Can be parameterized with placeholders
|
|
62
120
|
connection_name: The connection name for the database. If None, uses the one configured for the model
|
|
63
121
|
|
|
64
122
|
Returns:
|
|
65
123
|
The query result as a pandas DataFrame
|
|
66
124
|
"""
|
|
67
125
|
connection_name = self.connection_name if connection_name is None else connection_name
|
|
68
|
-
return ConnectionSetIO.obj.run_sql_query_from_conn_name(
|
|
126
|
+
return ConnectionSetIO.obj.run_sql_query_from_conn_name(sql_query, connection_name, self._placeholders)
|
|
69
127
|
|
|
70
|
-
def run_sql_on_dataframes(self,
|
|
128
|
+
def run_sql_on_dataframes(self, sql_query: str, *, dataframes: dict[str, pd.DataFrame] = None, **kwargs) -> pd.DataFrame:
|
|
71
129
|
"""
|
|
72
|
-
Uses a dictionary of dataframes to execute a SQL query in an in-memory sqlite
|
|
130
|
+
Uses a dictionary of dataframes to execute a SQL query in an embedded in-memory database (sqlite or duckdb based on setting)
|
|
73
131
|
|
|
74
132
|
Parameters:
|
|
75
|
-
|
|
76
|
-
dataframes: A dictionary of table names to their pandas Dataframe
|
|
133
|
+
sql_query: The SQL query to run
|
|
134
|
+
dataframes: A dictionary of table names to their pandas Dataframe. If None, uses results of dependent models
|
|
77
135
|
|
|
78
136
|
Returns:
|
|
79
137
|
The result as a pandas Dataframe from running the query
|
|
80
138
|
"""
|
|
81
139
|
if dataframes is None:
|
|
82
|
-
dataframes = {x: self.ref(x) for x in self.
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
if use_duckdb:
|
|
86
|
-
import duckdb
|
|
87
|
-
conn = duckdb.connect()
|
|
88
|
-
else:
|
|
89
|
-
conn = sqlite3.connect(":memory:")
|
|
90
|
-
|
|
91
|
-
try:
|
|
92
|
-
for name, df in dataframes.items():
|
|
93
|
-
if use_duckdb:
|
|
94
|
-
conn.execute(f"CREATE TABLE {name} AS FROM df")
|
|
95
|
-
else:
|
|
96
|
-
df.to_sql(name, conn, index=False)
|
|
97
|
-
|
|
98
|
-
return conn.execute(query).df() if use_duckdb else pd.read_sql(query, conn)
|
|
99
|
-
finally:
|
|
100
|
-
conn.close()
|
|
140
|
+
dataframes = {x: self.ref(x) for x in self._dependencies}
|
|
141
|
+
|
|
142
|
+
return u.run_sql_on_dataframes(sql_query, dataframes)
|