squirrels 0.4.1__py3-none-any.whl → 0.5.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of squirrels might be problematic. Click here for more details.
- squirrels/__init__.py +10 -6
- squirrels/_api_response_models.py +93 -44
- squirrels/_api_server.py +571 -219
- squirrels/_auth.py +451 -0
- squirrels/_command_line.py +61 -20
- squirrels/_connection_set.py +38 -25
- squirrels/_constants.py +44 -34
- squirrels/_dashboards_io.py +34 -16
- squirrels/_exceptions.py +57 -0
- squirrels/_initializer.py +117 -44
- squirrels/_manifest.py +124 -62
- squirrels/_model_builder.py +111 -0
- squirrels/_model_configs.py +74 -0
- squirrels/_model_queries.py +52 -0
- squirrels/_models.py +860 -354
- squirrels/_package_loader.py +8 -4
- squirrels/_parameter_configs.py +45 -65
- squirrels/_parameter_sets.py +15 -13
- squirrels/_project.py +561 -0
- squirrels/_py_module.py +4 -3
- squirrels/_seeds.py +35 -16
- squirrels/_sources.py +106 -0
- squirrels/_utils.py +166 -63
- squirrels/_version.py +1 -1
- squirrels/arguments/init_time_args.py +78 -15
- squirrels/arguments/run_time_args.py +62 -101
- squirrels/dashboards.py +4 -4
- squirrels/data_sources.py +94 -162
- squirrels/dataset_result.py +86 -0
- squirrels/dateutils.py +4 -4
- squirrels/package_data/base_project/.env +30 -0
- squirrels/package_data/base_project/.env.example +30 -0
- squirrels/package_data/base_project/.gitignore +3 -2
- squirrels/package_data/base_project/assets/expenses.db +0 -0
- squirrels/package_data/base_project/connections.yml +11 -3
- squirrels/package_data/base_project/dashboards/dashboard_example.py +15 -13
- squirrels/package_data/base_project/dashboards/dashboard_example.yml +22 -0
- squirrels/package_data/base_project/docker/.dockerignore +5 -2
- squirrels/package_data/base_project/docker/Dockerfile +3 -3
- squirrels/package_data/base_project/docker/compose.yml +1 -1
- squirrels/package_data/base_project/duckdb_init.sql +9 -0
- squirrels/package_data/base_project/macros/macros_example.sql +15 -0
- squirrels/package_data/base_project/models/builds/build_example.py +26 -0
- squirrels/package_data/base_project/models/builds/build_example.sql +16 -0
- squirrels/package_data/base_project/models/builds/build_example.yml +55 -0
- squirrels/package_data/base_project/models/dbviews/dbview_example.sql +12 -22
- squirrels/package_data/base_project/models/dbviews/dbview_example.yml +26 -0
- squirrels/package_data/base_project/models/federates/federate_example.py +38 -15
- squirrels/package_data/base_project/models/federates/federate_example.sql +16 -2
- squirrels/package_data/base_project/models/federates/federate_example.yml +65 -0
- squirrels/package_data/base_project/models/sources.yml +39 -0
- squirrels/package_data/base_project/parameters.yml +36 -21
- squirrels/package_data/base_project/pyconfigs/connections.py +6 -11
- squirrels/package_data/base_project/pyconfigs/context.py +20 -33
- squirrels/package_data/base_project/pyconfigs/parameters.py +19 -21
- squirrels/package_data/base_project/pyconfigs/user.py +23 -0
- squirrels/package_data/base_project/seeds/seed_categories.yml +15 -0
- squirrels/package_data/base_project/seeds/seed_subcategories.csv +15 -15
- squirrels/package_data/base_project/seeds/seed_subcategories.yml +21 -0
- squirrels/package_data/base_project/squirrels.yml.j2 +17 -40
- squirrels/parameters.py +20 -20
- {squirrels-0.4.1.dist-info → squirrels-0.5.0rc0.dist-info}/METADATA +31 -32
- squirrels-0.5.0rc0.dist-info/RECORD +70 -0
- {squirrels-0.4.1.dist-info → squirrels-0.5.0rc0.dist-info}/WHEEL +1 -1
- squirrels-0.5.0rc0.dist-info/entry_points.txt +3 -0
- {squirrels-0.4.1.dist-info → squirrels-0.5.0rc0.dist-info/licenses}/LICENSE +1 -1
- squirrels/_authenticator.py +0 -85
- squirrels/_environcfg.py +0 -84
- squirrels/package_data/assets/favicon.ico +0 -0
- squirrels/package_data/assets/index.css +0 -1
- squirrels/package_data/assets/index.js +0 -58
- squirrels/package_data/base_project/dashboards.yml +0 -10
- squirrels/package_data/base_project/env.yml +0 -29
- squirrels/package_data/base_project/models/dbviews/dbview_example.py +0 -47
- squirrels/package_data/base_project/pyconfigs/auth.py +0 -45
- squirrels/package_data/templates/index.html +0 -18
- squirrels/project.py +0 -378
- squirrels/user_base.py +0 -55
- squirrels-0.4.1.dist-info/RECORD +0 -60
- squirrels-0.4.1.dist-info/entry_points.txt +0 -4
squirrels/data_sources.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations as _a
|
|
2
|
-
import
|
|
2
|
+
import polars as _pl, typing as _t, dataclasses as _d, abc as _abc
|
|
3
3
|
|
|
4
|
-
from . import _parameter_configs as _pc, parameter_options as _po
|
|
4
|
+
from . import _parameter_configs as _pc, parameter_options as _po
|
|
5
|
+
from ._exceptions import ConfigurationError
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
@_d.dataclass
|
|
@@ -14,22 +15,22 @@ class DataSource(metaclass=_abc.ABCMeta):
|
|
|
14
15
|
_is_from_seeds: bool
|
|
15
16
|
_user_group_col: str | None
|
|
16
17
|
_parent_id_col: str | None
|
|
17
|
-
|
|
18
|
+
_connection: str | None
|
|
18
19
|
|
|
19
20
|
@_abc.abstractmethod
|
|
20
21
|
def __init__(
|
|
21
22
|
self, table_or_query: str, *, id_col: str | None = None, from_seeds: bool = False, user_group_col: str | None = None,
|
|
22
|
-
parent_id_col: str | None = None,
|
|
23
|
+
parent_id_col: str | None = None, connection: str | None = None, **kwargs
|
|
23
24
|
) -> None:
|
|
24
25
|
self._table_or_query = table_or_query
|
|
25
26
|
self._id_col = id_col
|
|
26
27
|
self._is_from_seeds = from_seeds
|
|
27
28
|
self._user_group_col = user_group_col
|
|
28
29
|
self._parent_id_col = parent_id_col
|
|
29
|
-
self.
|
|
30
|
+
self._connection = connection
|
|
30
31
|
|
|
31
32
|
def _get_connection_name(self, default_conn_name: str) -> str:
|
|
32
|
-
return self.
|
|
33
|
+
return self._connection if self._connection is not None else default_conn_name
|
|
33
34
|
|
|
34
35
|
def _get_query(self) -> str:
|
|
35
36
|
"""
|
|
@@ -45,7 +46,7 @@ class DataSource(metaclass=_abc.ABCMeta):
|
|
|
45
46
|
return query
|
|
46
47
|
|
|
47
48
|
@_abc.abstractmethod
|
|
48
|
-
def _convert(self, ds_param: _pc.DataSourceParameterConfig, df:
|
|
49
|
+
def _convert(self, ds_param: _pc.DataSourceParameterConfig, df: _pl.DataFrame) -> _pc.ParameterConfig:
|
|
49
50
|
"""
|
|
50
51
|
An abstract method for converting itself into a parameter
|
|
51
52
|
"""
|
|
@@ -55,30 +56,32 @@ class DataSource(metaclass=_abc.ABCMeta):
|
|
|
55
56
|
if ds_param.parameter_type != target_parameter_type:
|
|
56
57
|
parameter_type_name = ds_param.parameter_type.__name__
|
|
57
58
|
datasource_type_name = self.__class__.__name__
|
|
58
|
-
raise
|
|
59
|
+
raise ConfigurationError(f'Invalid widget type "{parameter_type_name}" for {datasource_type_name}')
|
|
59
60
|
|
|
60
|
-
def _get_aggregated_df(self, df:
|
|
61
|
-
|
|
61
|
+
def _get_aggregated_df(self, df: _pl.DataFrame, columns_to_include: _t.Iterable[str]) -> _pl.DataFrame:
|
|
62
|
+
if self._id_col is None:
|
|
63
|
+
return df
|
|
64
|
+
|
|
65
|
+
agg_rules = []
|
|
62
66
|
for column in columns_to_include:
|
|
63
67
|
if column is not None:
|
|
64
|
-
agg_rules
|
|
68
|
+
agg_rules.append(_pl.first(column))
|
|
65
69
|
if self._user_group_col is not None:
|
|
66
|
-
agg_rules
|
|
70
|
+
agg_rules.append(_pl.col(self._user_group_col))
|
|
67
71
|
if self._parent_id_col is not None:
|
|
68
|
-
agg_rules
|
|
72
|
+
agg_rules.append(_pl.col(self._parent_id_col))
|
|
69
73
|
|
|
70
|
-
groupby_dim = self._id_col if self._id_col is not None else df.index
|
|
71
74
|
try:
|
|
72
|
-
df_agg = df.
|
|
73
|
-
except
|
|
74
|
-
raise
|
|
75
|
+
df_agg = df.group_by(self._id_col).agg(agg_rules).sort(by=self._id_col)
|
|
76
|
+
except _pl.exceptions.ColumnNotFoundError as e:
|
|
77
|
+
raise ConfigurationError(e)
|
|
75
78
|
|
|
76
79
|
return df_agg
|
|
77
80
|
|
|
78
|
-
def _get_key_from_record(self, key: str | None, record: dict[
|
|
81
|
+
def _get_key_from_record(self, key: str | None, record: dict[_t.Hashable, _t.Any], default: _t.Any) -> _t.Any:
|
|
79
82
|
return record[key] if key is not None else default
|
|
80
83
|
|
|
81
|
-
def _get_key_from_record_as_list(self, key: str | None, record: dict[
|
|
84
|
+
def _get_key_from_record_as_list(self, key: str | None, record: dict[_t.Hashable, _t.Any]) -> _t.Iterable[str]:
|
|
82
85
|
value = self._get_key_from_record(key, record, list())
|
|
83
86
|
return [str(x) for x in value]
|
|
84
87
|
|
|
@@ -97,43 +100,45 @@ class _SelectionDataSource(DataSource):
|
|
|
97
100
|
def __init__(
|
|
98
101
|
self, table_or_query: str, id_col: str, options_col: str, *, order_by_col: str | None = None,
|
|
99
102
|
is_default_col: str | None = None, custom_cols: dict[str, str] = {}, from_seeds: bool = False,
|
|
100
|
-
user_group_col: str | None = None, parent_id_col: str | None = None,
|
|
103
|
+
user_group_col: str | None = None, parent_id_col: str | None = None, connection: str | None = None,
|
|
101
104
|
**kwargs
|
|
102
105
|
) -> None:
|
|
103
106
|
super().__init__(
|
|
104
107
|
table_or_query, id_col=id_col, from_seeds=from_seeds, user_group_col=user_group_col, parent_id_col=parent_id_col,
|
|
105
|
-
|
|
108
|
+
connection=connection
|
|
106
109
|
)
|
|
107
110
|
self._options_col = options_col
|
|
108
111
|
self._order_by_col = order_by_col
|
|
109
112
|
self._is_default_col = is_default_col
|
|
110
113
|
self._custom_cols = custom_cols
|
|
111
114
|
|
|
112
|
-
def _get_all_options(self, df:
|
|
115
|
+
def _get_all_options(self, df: _pl.DataFrame) -> _t.Sequence[_po.SelectParameterOption]:
|
|
113
116
|
columns = [self._options_col, self._order_by_col, self._is_default_col, *self._custom_cols.values()]
|
|
114
117
|
df_agg = self._get_aggregated_df(df, columns)
|
|
115
118
|
|
|
116
119
|
if self._order_by_col is None:
|
|
117
|
-
df_agg.
|
|
120
|
+
df_agg = df_agg.sort(by=self._id_col)
|
|
118
121
|
else:
|
|
119
|
-
df_agg.
|
|
122
|
+
df_agg = df_agg.sort(by=self._order_by_col)
|
|
120
123
|
|
|
121
|
-
def get_is_default(record: dict[
|
|
124
|
+
def get_is_default(record: dict[_t.Hashable, _t.Any]) -> bool:
|
|
122
125
|
return int(record[self._is_default_col]) == 1 if self._is_default_col is not None else False
|
|
123
126
|
|
|
124
|
-
def get_custom_fields(record: dict[
|
|
127
|
+
def get_custom_fields(record: dict[_t.Hashable, _t.Any]) -> dict[str, _t.Any]:
|
|
125
128
|
result = {}
|
|
126
129
|
for key, val in self._custom_cols.items():
|
|
127
130
|
result[key] = record[val]
|
|
128
131
|
return result
|
|
129
132
|
|
|
130
|
-
records = df_agg.to_dict("
|
|
133
|
+
records = df_agg.to_pandas().to_dict("records")
|
|
131
134
|
return tuple(
|
|
132
|
-
_po.SelectParameterOption(
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
135
|
+
_po.SelectParameterOption(
|
|
136
|
+
str(record[self._id_col]), str(record[self._options_col]),
|
|
137
|
+
is_default=get_is_default(record), custom_fields=get_custom_fields(record),
|
|
138
|
+
user_groups=self._get_key_from_record_as_list(self._user_group_col, record),
|
|
139
|
+
parent_option_ids=self._get_key_from_record_as_list(self._parent_id_col, record)
|
|
140
|
+
)
|
|
141
|
+
for record in records
|
|
137
142
|
)
|
|
138
143
|
|
|
139
144
|
|
|
@@ -146,7 +151,7 @@ class SelectDataSource(_SelectionDataSource):
|
|
|
146
151
|
def __init__(
|
|
147
152
|
self, table_or_query: str, id_col: str, options_col: str, *, order_by_col: str | None = None,
|
|
148
153
|
is_default_col: str | None = None, custom_cols: dict[str, str] = {}, from_seeds: bool = False,
|
|
149
|
-
user_group_col: str | None = None, parent_id_col: str | None = None,
|
|
154
|
+
user_group_col: str | None = None, parent_id_col: str | None = None, connection: str | None = None,
|
|
150
155
|
**kwargs
|
|
151
156
|
) -> None:
|
|
152
157
|
"""
|
|
@@ -162,14 +167,14 @@ class SelectDataSource(_SelectionDataSource):
|
|
|
162
167
|
from_seeds: Boolean for whether this datasource is created from seeds
|
|
163
168
|
user_group_col: The column name of the user group that the user is in for this option to be valid
|
|
164
169
|
parent_id_col: The column name of the parent option id that must be selected for this option to be valid
|
|
165
|
-
|
|
170
|
+
connection: Name of the connection to use defined in connections.py
|
|
166
171
|
"""
|
|
167
172
|
super().__init__(
|
|
168
173
|
table_or_query, id_col, options_col, order_by_col=order_by_col, is_default_col=is_default_col, custom_cols=custom_cols,
|
|
169
|
-
from_seeds=from_seeds, user_group_col=user_group_col, parent_id_col=parent_id_col,
|
|
174
|
+
from_seeds=from_seeds, user_group_col=user_group_col, parent_id_col=parent_id_col, connection=connection
|
|
170
175
|
)
|
|
171
176
|
|
|
172
|
-
def _convert(self, ds_param: _pc.DataSourceParameterConfig, df:
|
|
177
|
+
def _convert(self, ds_param: _pc.DataSourceParameterConfig, df: _pl.DataFrame) -> _pc.SelectionParameterConfig:
|
|
173
178
|
"""
|
|
174
179
|
Method to convert the associated DataSourceParameter into a SingleSelectParameterConfig or MultiSelectParameterConfig
|
|
175
180
|
|
|
@@ -192,86 +197,7 @@ class SelectDataSource(_SelectionDataSource):
|
|
|
192
197
|
user_attribute=ds_param.user_attribute, parent_name=ds_param.parent_name, **ds_param.extra_args
|
|
193
198
|
)
|
|
194
199
|
else:
|
|
195
|
-
raise
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
@_d.dataclass
|
|
199
|
-
class SingleSelectDataSource(_SelectionDataSource):
|
|
200
|
-
"""
|
|
201
|
-
DEPRECATED. Use "SelectDataSource" instead.
|
|
202
|
-
"""
|
|
203
|
-
|
|
204
|
-
def __init__(
|
|
205
|
-
self, table_or_query: str, id_col: str, options_col: str, *, order_by_col: str | None = None,
|
|
206
|
-
is_default_col: str | None = None, custom_cols: dict[str, str] = {}, user_group_col: str | None = None,
|
|
207
|
-
parent_id_col: str | None = None, connection_name: str | None = None, **kwargs
|
|
208
|
-
) -> None:
|
|
209
|
-
"""
|
|
210
|
-
DEPRECATED. Use "SelectDataSource" instead.
|
|
211
|
-
"""
|
|
212
|
-
super().__init__(table_or_query, id_col, options_col, order_by_col=order_by_col, is_default_col=is_default_col,
|
|
213
|
-
custom_cols=custom_cols, user_group_col=user_group_col, parent_id_col=parent_id_col,
|
|
214
|
-
connection_name=connection_name)
|
|
215
|
-
|
|
216
|
-
def _convert(self, ds_param: _pc.DataSourceParameterConfig, df: _pd.DataFrame) -> _pc.SingleSelectParameterConfig:
|
|
217
|
-
"""
|
|
218
|
-
Method to convert the associated DataSourceParameter into a SingleSelectParameterConfig
|
|
219
|
-
|
|
220
|
-
Arguments:
|
|
221
|
-
ds_param: The parameter to convert
|
|
222
|
-
df: The dataframe containing the parameter options data
|
|
223
|
-
|
|
224
|
-
Returns:
|
|
225
|
-
The converted parameter
|
|
226
|
-
"""
|
|
227
|
-
self._validate_parameter_type(ds_param, _pc.SingleSelectParameterConfig)
|
|
228
|
-
all_options = self._get_all_options(df)
|
|
229
|
-
return _pc.SingleSelectParameterConfig(ds_param.name, ds_param.label, all_options, description=ds_param.description,
|
|
230
|
-
user_attribute=ds_param.user_attribute, parent_name=ds_param.parent_name)
|
|
231
|
-
|
|
232
|
-
@_d.dataclass
|
|
233
|
-
class MultiSelectDataSource(_SelectionDataSource):
|
|
234
|
-
"""
|
|
235
|
-
DEPRECATED. Use "SelectDataSource" instead.
|
|
236
|
-
"""
|
|
237
|
-
_show_select_all: bool
|
|
238
|
-
_order_matters: bool
|
|
239
|
-
_none_is_all: bool
|
|
240
|
-
|
|
241
|
-
def __init__(
|
|
242
|
-
self, table_or_query: str, id_col: str, options_col: str, *, order_by_col: str | None = None,
|
|
243
|
-
is_default_col: str | None = None, custom_cols: dict[str, str] = {}, show_select_all: bool = True,
|
|
244
|
-
order_matters: bool = False, none_is_all: bool = True, user_group_col: str | None = None,
|
|
245
|
-
parent_id_col: str | None = None, connection_name: str | None = None, **kwargs
|
|
246
|
-
) -> None:
|
|
247
|
-
"""
|
|
248
|
-
DEPRECATED. Use "SelectDataSource" instead.
|
|
249
|
-
"""
|
|
250
|
-
super().__init__(table_or_query, id_col, options_col, order_by_col=order_by_col, is_default_col=is_default_col,
|
|
251
|
-
custom_cols=custom_cols, user_group_col=user_group_col, parent_id_col=parent_id_col,
|
|
252
|
-
connection_name=connection_name)
|
|
253
|
-
self._show_select_all = show_select_all
|
|
254
|
-
self._order_matters = order_matters
|
|
255
|
-
self._none_is_all = none_is_all
|
|
256
|
-
|
|
257
|
-
def _convert(self, ds_param: _pc.DataSourceParameterConfig, df: _pd.DataFrame) -> _pc.MultiSelectParameterConfig:
|
|
258
|
-
"""
|
|
259
|
-
Method to convert the associated DataSourceParameter into a MultiSelectParameterConfig
|
|
260
|
-
|
|
261
|
-
Arguments:
|
|
262
|
-
ds_param: The parameter to convert
|
|
263
|
-
df: The dataframe containing the parameter options data
|
|
264
|
-
|
|
265
|
-
Returns:
|
|
266
|
-
The converted parameter
|
|
267
|
-
"""
|
|
268
|
-
self._validate_parameter_type(ds_param, _pc.MultiSelectParameterConfig)
|
|
269
|
-
all_options = self._get_all_options(df)
|
|
270
|
-
return _pc.MultiSelectParameterConfig(
|
|
271
|
-
ds_param.name, ds_param.label, all_options, show_select_all=self._show_select_all,
|
|
272
|
-
order_matters=self._order_matters, none_is_all=self._none_is_all, description=ds_param.description,
|
|
273
|
-
user_attribute=ds_param.user_attribute, parent_name=ds_param.parent_name
|
|
274
|
-
)
|
|
200
|
+
raise ConfigurationError(f'Invalid widget type "{ds_param.parameter_type}" for SelectDataSource')
|
|
275
201
|
|
|
276
202
|
|
|
277
203
|
@_d.dataclass
|
|
@@ -286,7 +212,7 @@ class DateDataSource(DataSource):
|
|
|
286
212
|
self, table_or_query: str, default_date_col: str, *, min_date_col: str | None = None,
|
|
287
213
|
max_date_col: str | None = None, date_format: str = '%Y-%m-%d', id_col: str | None = None,
|
|
288
214
|
from_seeds: bool = False, user_group_col: str | None = None, parent_id_col: str | None = None,
|
|
289
|
-
|
|
215
|
+
connection: str | None = None, **kwargs
|
|
290
216
|
) -> None:
|
|
291
217
|
"""
|
|
292
218
|
Constructor for DateDataSource
|
|
@@ -299,18 +225,18 @@ class DateDataSource(DataSource):
|
|
|
299
225
|
from_seeds: Boolean for whether this datasource is created from seeds
|
|
300
226
|
user_group_col: The column name of the user group that the user is in for this option to be valid
|
|
301
227
|
parent_id_col: The column name of the parent option id that the default date belongs to
|
|
302
|
-
|
|
228
|
+
connection: Name of the connection to use defined in connections.py
|
|
303
229
|
"""
|
|
304
230
|
super().__init__(
|
|
305
231
|
table_or_query, id_col=id_col, from_seeds=from_seeds, user_group_col=user_group_col, parent_id_col=parent_id_col,
|
|
306
|
-
|
|
232
|
+
connection=connection
|
|
307
233
|
)
|
|
308
234
|
self._default_date_col = default_date_col
|
|
309
235
|
self._min_date_col = min_date_col
|
|
310
236
|
self._max_date_col = max_date_col
|
|
311
237
|
self._date_format = date_format
|
|
312
238
|
|
|
313
|
-
def _convert(self, ds_param: _pc.DataSourceParameterConfig, df:
|
|
239
|
+
def _convert(self, ds_param: _pc.DataSourceParameterConfig, df: _pl.DataFrame) -> _pc.DateParameterConfig:
|
|
314
240
|
"""
|
|
315
241
|
Method to convert the associated DataSourceParameter into a DateParameterConfig
|
|
316
242
|
|
|
@@ -326,7 +252,7 @@ class DateDataSource(DataSource):
|
|
|
326
252
|
columns = [self._default_date_col, self._min_date_col, self._max_date_col]
|
|
327
253
|
df_agg = self._get_aggregated_df(df, columns)
|
|
328
254
|
|
|
329
|
-
records = df_agg.to_dict("
|
|
255
|
+
records = df_agg.to_pandas().to_dict("records")
|
|
330
256
|
options = tuple(
|
|
331
257
|
_po.DateParameterOption(
|
|
332
258
|
str(record[self._default_date_col]), date_format=self._date_format,
|
|
@@ -335,7 +261,7 @@ class DateDataSource(DataSource):
|
|
|
335
261
|
user_groups=self._get_key_from_record_as_list(self._user_group_col, record),
|
|
336
262
|
parent_option_ids=self._get_key_from_record_as_list(self._parent_id_col, record)
|
|
337
263
|
)
|
|
338
|
-
for
|
|
264
|
+
for record in records
|
|
339
265
|
)
|
|
340
266
|
return _pc.DateParameterConfig(
|
|
341
267
|
ds_param.name, ds_param.label, options, description=ds_param.description, user_attribute=ds_param.user_attribute,
|
|
@@ -355,7 +281,7 @@ class DateRangeDataSource(DataSource):
|
|
|
355
281
|
def __init__(
|
|
356
282
|
self, table_or_query: str, default_start_date_col: str, default_end_date_col: str, *, date_format: str = '%Y-%m-%d',
|
|
357
283
|
min_date_col: str | None = None, max_date_col: str | None = None, id_col: str | None = None, from_seeds: bool = False,
|
|
358
|
-
user_group_col: str | None = None, parent_id_col: str | None = None,
|
|
284
|
+
user_group_col: str | None = None, parent_id_col: str | None = None, connection: str | None = None, **kwargs
|
|
359
285
|
) -> None:
|
|
360
286
|
"""
|
|
361
287
|
Constructor for DateRangeDataSource
|
|
@@ -369,11 +295,11 @@ class DateRangeDataSource(DataSource):
|
|
|
369
295
|
from_seeds: Boolean for whether this datasource is created from seeds
|
|
370
296
|
user_group_col: The column name of the user group that the user is in for this option to be valid
|
|
371
297
|
parent_id_col: The column name of the parent option id that the default date belongs to
|
|
372
|
-
|
|
298
|
+
connection: Name of the connection to use defined in connections.py
|
|
373
299
|
"""
|
|
374
300
|
super().__init__(
|
|
375
301
|
table_or_query, id_col=id_col, from_seeds=from_seeds, user_group_col=user_group_col, parent_id_col=parent_id_col,
|
|
376
|
-
|
|
302
|
+
connection=connection
|
|
377
303
|
)
|
|
378
304
|
self._default_start_date_col = default_start_date_col
|
|
379
305
|
self._default_end_date_col = default_end_date_col
|
|
@@ -381,7 +307,7 @@ class DateRangeDataSource(DataSource):
|
|
|
381
307
|
self._max_date_col = max_date_col
|
|
382
308
|
self._date_format = date_format
|
|
383
309
|
|
|
384
|
-
def _convert(self, ds_param: _pc.DataSourceParameterConfig, df:
|
|
310
|
+
def _convert(self, ds_param: _pc.DataSourceParameterConfig, df: _pl.DataFrame) -> _pc.DateRangeParameterConfig:
|
|
385
311
|
"""
|
|
386
312
|
Method to convert the associated DataSourceParameter into a DateRangeParameterConfig
|
|
387
313
|
|
|
@@ -397,7 +323,7 @@ class DateRangeDataSource(DataSource):
|
|
|
397
323
|
columns = [self._default_start_date_col, self._default_end_date_col, self._min_date_col, self._max_date_col]
|
|
398
324
|
df_agg = self._get_aggregated_df(df, columns)
|
|
399
325
|
|
|
400
|
-
records = df_agg.to_dict("
|
|
326
|
+
records = df_agg.to_pandas().to_dict("records")
|
|
401
327
|
options = tuple(
|
|
402
328
|
_po.DateRangeParameterOption(
|
|
403
329
|
str(record[self._default_start_date_col]), str(record[self._default_end_date_col]),
|
|
@@ -407,7 +333,7 @@ class DateRangeDataSource(DataSource):
|
|
|
407
333
|
user_groups=self._get_key_from_record_as_list(self._user_group_col, record),
|
|
408
334
|
parent_option_ids=self._get_key_from_record_as_list(self._parent_id_col, record)
|
|
409
335
|
)
|
|
410
|
-
for
|
|
336
|
+
for record in records
|
|
411
337
|
)
|
|
412
338
|
return _pc.DateRangeParameterConfig(
|
|
413
339
|
ds_param.name, ds_param.label, options, description=ds_param.description, user_attribute=ds_param.user_attribute,
|
|
@@ -428,11 +354,11 @@ class _NumericDataSource(DataSource):
|
|
|
428
354
|
def __init__(
|
|
429
355
|
self, table_or_query: str, min_value_col: str, max_value_col: str, *, increment_col: str | None = None,
|
|
430
356
|
id_col: str | None = None, from_seeds: bool = False, user_group_col: str | None = None,
|
|
431
|
-
parent_id_col: str | None = None,
|
|
357
|
+
parent_id_col: str | None = None, connection: str | None = None, **kwargs
|
|
432
358
|
) -> None:
|
|
433
359
|
super().__init__(
|
|
434
360
|
table_or_query, id_col=id_col, from_seeds=from_seeds, user_group_col=user_group_col, parent_id_col=parent_id_col,
|
|
435
|
-
|
|
361
|
+
connection=connection
|
|
436
362
|
)
|
|
437
363
|
self._min_value_col = min_value_col
|
|
438
364
|
self._max_value_col = max_value_col
|
|
@@ -449,7 +375,7 @@ class NumberDataSource(_NumericDataSource):
|
|
|
449
375
|
def __init__(
|
|
450
376
|
self, table_or_query: str, min_value_col: str, max_value_col: str, *, increment_col: str | None = None,
|
|
451
377
|
default_value_col: str | None = None, id_col: str | None = None, from_seeds: bool = False,
|
|
452
|
-
user_group_col: str | None = None, parent_id_col: str | None = None,
|
|
378
|
+
user_group_col: str | None = None, parent_id_col: str | None = None, connection: str | None = None, **kwargs
|
|
453
379
|
) -> None:
|
|
454
380
|
"""
|
|
455
381
|
Constructor for NumberDataSource
|
|
@@ -464,15 +390,15 @@ class NumberDataSource(_NumericDataSource):
|
|
|
464
390
|
from_seeds: Boolean for whether this datasource is created from seeds
|
|
465
391
|
user_group_col: The column name of the user group that the user is in for this option to be valid
|
|
466
392
|
parent_id_col: The column name of the parent option id that the default value belongs to
|
|
467
|
-
|
|
393
|
+
connection: Name of the connection to use defined in connections.py
|
|
468
394
|
"""
|
|
469
395
|
super().__init__(
|
|
470
396
|
table_or_query, min_value_col, max_value_col, increment_col=increment_col, id_col=id_col, from_seeds=from_seeds,
|
|
471
|
-
user_group_col=user_group_col, parent_id_col=parent_id_col,
|
|
397
|
+
user_group_col=user_group_col, parent_id_col=parent_id_col, connection=connection
|
|
472
398
|
)
|
|
473
399
|
self._default_value_col = default_value_col
|
|
474
400
|
|
|
475
|
-
def _convert(self, ds_param: _pc.DataSourceParameterConfig, df:
|
|
401
|
+
def _convert(self, ds_param: _pc.DataSourceParameterConfig, df: _pl.DataFrame) -> _pc.NumberParameterConfig:
|
|
476
402
|
"""
|
|
477
403
|
Method to convert the associated DataSourceParameter into a NumberParameterConfig
|
|
478
404
|
|
|
@@ -488,14 +414,16 @@ class NumberDataSource(_NumericDataSource):
|
|
|
488
414
|
columns = [self._min_value_col, self._max_value_col, self._increment_col, self._default_value_col]
|
|
489
415
|
df_agg = self._get_aggregated_df(df, columns)
|
|
490
416
|
|
|
491
|
-
records = df_agg.to_dict("
|
|
417
|
+
records = df_agg.to_pandas().to_dict("records")
|
|
492
418
|
options = tuple(
|
|
493
|
-
_po.NumberParameterOption(
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
419
|
+
_po.NumberParameterOption(
|
|
420
|
+
record[self._min_value_col], record[self._max_value_col],
|
|
421
|
+
increment=self._get_key_from_record(self._increment_col, record, 1),
|
|
422
|
+
default_value=self._get_key_from_record(self._default_value_col, record, None),
|
|
423
|
+
user_groups=self._get_key_from_record_as_list(self._user_group_col, record),
|
|
424
|
+
parent_option_ids=self._get_key_from_record_as_list(self._parent_id_col, record)
|
|
425
|
+
)
|
|
426
|
+
for record in records
|
|
499
427
|
)
|
|
500
428
|
return _pc.NumberParameterConfig(
|
|
501
429
|
ds_param.name, ds_param.label, options, description=ds_param.description, user_attribute=ds_param.user_attribute,
|
|
@@ -515,7 +443,7 @@ class NumberRangeDataSource(_NumericDataSource):
|
|
|
515
443
|
self, table_or_query: str, min_value_col: str, max_value_col: str, *, increment_col: str | None = None,
|
|
516
444
|
default_lower_value_col: str | None = None, default_upper_value_col: str | None = None, id_col: str | None = None,
|
|
517
445
|
from_seeds: bool = False, user_group_col: str | None = None, parent_id_col: str | None = None,
|
|
518
|
-
|
|
446
|
+
connection: str | None = None, **kwargs
|
|
519
447
|
) -> None:
|
|
520
448
|
"""
|
|
521
449
|
Constructor for NumRangeDataSource
|
|
@@ -531,16 +459,16 @@ class NumberRangeDataSource(_NumericDataSource):
|
|
|
531
459
|
from_seeds: Boolean for whether this datasource is created from seeds
|
|
532
460
|
user_group_col: The column name of the user group that the user is in for this option to be valid
|
|
533
461
|
parent_id_col: The column name of the parent option id that the default value belongs to
|
|
534
|
-
|
|
462
|
+
connection: Name of the connection to use defined in connections.py
|
|
535
463
|
"""
|
|
536
464
|
super().__init__(
|
|
537
465
|
table_or_query, min_value_col, max_value_col, increment_col=increment_col, id_col=id_col, from_seeds=from_seeds,
|
|
538
|
-
user_group_col=user_group_col, parent_id_col=parent_id_col,
|
|
466
|
+
user_group_col=user_group_col, parent_id_col=parent_id_col, connection=connection
|
|
539
467
|
)
|
|
540
468
|
self._default_lower_value_col = default_lower_value_col
|
|
541
469
|
self._default_upper_value_col = default_upper_value_col
|
|
542
470
|
|
|
543
|
-
def _convert(self, ds_param: _pc.DataSourceParameterConfig, df:
|
|
471
|
+
def _convert(self, ds_param: _pc.DataSourceParameterConfig, df: _pl.DataFrame) -> _pc.NumberRangeParameterConfig:
|
|
544
472
|
"""
|
|
545
473
|
Method to convert the associated DataSourceParameter into a NumberRangeParameterConfig
|
|
546
474
|
|
|
@@ -556,15 +484,17 @@ class NumberRangeDataSource(_NumericDataSource):
|
|
|
556
484
|
columns = [self._min_value_col, self._max_value_col, self._increment_col, self._default_lower_value_col, self._default_upper_value_col]
|
|
557
485
|
df_agg = self._get_aggregated_df(df, columns)
|
|
558
486
|
|
|
559
|
-
records = df_agg.to_dict("
|
|
487
|
+
records = df_agg.to_pandas().to_dict("records")
|
|
560
488
|
options = tuple(
|
|
561
|
-
_po.NumberRangeParameterOption(
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
489
|
+
_po.NumberRangeParameterOption(
|
|
490
|
+
record[self._min_value_col], record[self._max_value_col],
|
|
491
|
+
increment=self._get_key_from_record(self._increment_col, record, 1),
|
|
492
|
+
default_lower_value=self._get_key_from_record(self._default_lower_value_col, record, None),
|
|
493
|
+
default_upper_value=self._get_key_from_record(self._default_upper_value_col, record, None),
|
|
494
|
+
user_groups=self._get_key_from_record_as_list(self._user_group_col, record),
|
|
495
|
+
parent_option_ids=self._get_key_from_record_as_list(self._parent_id_col, record)
|
|
496
|
+
)
|
|
497
|
+
for record in records
|
|
568
498
|
)
|
|
569
499
|
return _pc.NumberRangeParameterConfig(
|
|
570
500
|
ds_param.name, ds_param.label, options, description=ds_param.description, user_attribute=ds_param.user_attribute,
|
|
@@ -581,7 +511,7 @@ class TextDataSource(DataSource):
|
|
|
581
511
|
|
|
582
512
|
def __init__(
|
|
583
513
|
self, table_or_query: str, default_text_col: str, *, id_col: str | None = None, from_seeds: bool = False,
|
|
584
|
-
user_group_col: str | None = None, parent_id_col: str | None = None,
|
|
514
|
+
user_group_col: str | None = None, parent_id_col: str | None = None, connection: str | None = None,
|
|
585
515
|
**kwargs
|
|
586
516
|
) -> None:
|
|
587
517
|
"""
|
|
@@ -594,15 +524,15 @@ class TextDataSource(DataSource):
|
|
|
594
524
|
from_seeds: Boolean for whether this datasource is created from seeds
|
|
595
525
|
user_group_col: The column name of the user group that the user is in for this option to be valid
|
|
596
526
|
parent_id_col: The column name of the parent option id that the default date belongs to
|
|
597
|
-
|
|
527
|
+
connection: Name of the connection to use defined in connections.py
|
|
598
528
|
"""
|
|
599
529
|
super().__init__(
|
|
600
530
|
table_or_query, id_col=id_col, from_seeds=from_seeds, user_group_col=user_group_col, parent_id_col=parent_id_col,
|
|
601
|
-
|
|
531
|
+
connection=connection
|
|
602
532
|
)
|
|
603
533
|
self._default_text_col = default_text_col
|
|
604
534
|
|
|
605
|
-
def _convert(self, ds_param: _pc.DataSourceParameterConfig, df:
|
|
535
|
+
def _convert(self, ds_param: _pc.DataSourceParameterConfig, df: _pl.DataFrame) -> _pc.TextParameterConfig:
|
|
606
536
|
"""
|
|
607
537
|
Method to convert the associated DataSourceParameter into a TextParameterConfig
|
|
608
538
|
|
|
@@ -618,12 +548,14 @@ class TextDataSource(DataSource):
|
|
|
618
548
|
columns = [self._default_text_col]
|
|
619
549
|
df_agg = self._get_aggregated_df(df, columns)
|
|
620
550
|
|
|
621
|
-
records = df_agg.to_dict("
|
|
551
|
+
records = df_agg.to_pandas().to_dict("records")
|
|
622
552
|
options = tuple(
|
|
623
|
-
_po.TextParameterOption(
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
553
|
+
_po.TextParameterOption(
|
|
554
|
+
default_text=str(record[self._default_text_col]),
|
|
555
|
+
user_groups=self._get_key_from_record_as_list(self._user_group_col, record),
|
|
556
|
+
parent_option_ids=self._get_key_from_record_as_list(self._parent_id_col, record)
|
|
557
|
+
)
|
|
558
|
+
for record in records
|
|
627
559
|
)
|
|
628
560
|
return _pc.TextParameterConfig(
|
|
629
561
|
ds_param.name, ds_param.label, options, description=ds_param.description, user_attribute=ds_param.user_attribute,
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
from typing import Callable, Literal
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from functools import cached_property, lru_cache
|
|
4
|
+
import polars as pl
|
|
5
|
+
|
|
6
|
+
from ._model_configs import ModelConfig
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class DatasetMetadata:
|
|
11
|
+
target_model_config: ModelConfig
|
|
12
|
+
|
|
13
|
+
@cached_property
|
|
14
|
+
def _json_repr(self) -> dict:
|
|
15
|
+
fields = []
|
|
16
|
+
for col in self.target_model_config.columns:
|
|
17
|
+
fields.append({
|
|
18
|
+
"name": col.name,
|
|
19
|
+
"type": col.type,
|
|
20
|
+
"condition": col.condition,
|
|
21
|
+
"description": col.description,
|
|
22
|
+
"category": col.category.value
|
|
23
|
+
})
|
|
24
|
+
|
|
25
|
+
return {
|
|
26
|
+
"schema": {
|
|
27
|
+
"fields": fields
|
|
28
|
+
},
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
def to_json(self) -> dict:
|
|
32
|
+
return self._json_repr
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class DatasetResult(DatasetMetadata):
|
|
37
|
+
df: pl.DataFrame
|
|
38
|
+
to_json: Callable[[str, tuple[str, ...], int, int], dict] = field(init=False)
|
|
39
|
+
|
|
40
|
+
def __post_init__(self):
|
|
41
|
+
self.to_json = lru_cache()(self._to_json)
|
|
42
|
+
|
|
43
|
+
def _to_json(self, orientation: Literal["records", "rows", "columns"], select: tuple[str, ...], limit: int, offset: int) -> dict:
|
|
44
|
+
df = self.df.lazy()
|
|
45
|
+
if offset > 0:
|
|
46
|
+
df = df.filter(pl.col("_row_num") > offset)
|
|
47
|
+
if limit > 0:
|
|
48
|
+
df = df.limit(limit)
|
|
49
|
+
if select:
|
|
50
|
+
df = df.select(select)
|
|
51
|
+
df = df.collect()
|
|
52
|
+
|
|
53
|
+
if orientation == "columns":
|
|
54
|
+
data = df.to_dict(as_series=False)
|
|
55
|
+
else:
|
|
56
|
+
data = df.to_dicts()
|
|
57
|
+
if orientation == "rows":
|
|
58
|
+
data = [[row[col] for col in df.columns] for row in data]
|
|
59
|
+
|
|
60
|
+
column_details_by_name = {col.name: col for col in self.target_model_config.columns}
|
|
61
|
+
fields = []
|
|
62
|
+
for col in df.columns:
|
|
63
|
+
if col == "_row_num":
|
|
64
|
+
fields.append({"name": "_row_num", "type": "integer", "description": "The row number of the dataset (starts at 1)", "category": "misc"})
|
|
65
|
+
elif col in column_details_by_name:
|
|
66
|
+
column_details = column_details_by_name[col]
|
|
67
|
+
fields.append({
|
|
68
|
+
"name": col,
|
|
69
|
+
"type": column_details.type,
|
|
70
|
+
"description": column_details.description,
|
|
71
|
+
"category": column_details.category.value
|
|
72
|
+
})
|
|
73
|
+
else:
|
|
74
|
+
fields.append({"name": col, "type": "unknown", "description": "", "category": "misc"})
|
|
75
|
+
|
|
76
|
+
return {
|
|
77
|
+
"schema": {
|
|
78
|
+
"fields": fields
|
|
79
|
+
},
|
|
80
|
+
"total_num_rows": self.df.select(pl.len()).item(),
|
|
81
|
+
"data_details": {
|
|
82
|
+
"num_rows": df.select(pl.len()).item(),
|
|
83
|
+
"orientation": orientation
|
|
84
|
+
},
|
|
85
|
+
"data": data
|
|
86
|
+
}
|
squirrels/dateutils.py
CHANGED
|
@@ -5,7 +5,7 @@ from dateutil.relativedelta import relativedelta
|
|
|
5
5
|
from abc import ABCMeta, abstractmethod
|
|
6
6
|
from enum import Enum
|
|
7
7
|
|
|
8
|
-
from . import _utils as
|
|
8
|
+
from . import _utils as u
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class DayOfWeek(Enum):
|
|
@@ -62,7 +62,7 @@ class _DayIdxOfCalendarUnit(DateModifier):
|
|
|
62
62
|
super().__init__()
|
|
63
63
|
self.idx = idx
|
|
64
64
|
if self.idx == 0:
|
|
65
|
-
raise
|
|
65
|
+
raise u.ConfigurationError(f"For constructors of class names that start with DayIdxOf_, idx cannot be zero")
|
|
66
66
|
self.incr = self.idx - 1 if self.idx > 0 else self.idx
|
|
67
67
|
|
|
68
68
|
|
|
@@ -84,7 +84,7 @@ class DayIdxOfMonthsCycle(_DayIdxOfCalendarUnit):
|
|
|
84
84
|
self._num_months_in_cycle = num_months_in_cycle
|
|
85
85
|
self._first_month_of_cycle = first_month_of_cycle
|
|
86
86
|
if 12 % self._num_months_in_cycle != 0:
|
|
87
|
-
raise
|
|
87
|
+
raise u.ConfigurationError(f"Value X must fit evenly in 12")
|
|
88
88
|
self.first_month_of_first_cycle = (self._first_month_of_cycle.value - 1) % self._num_months_in_cycle + 1
|
|
89
89
|
|
|
90
90
|
def modify(self, date: Date) -> Date:
|
|
@@ -302,7 +302,7 @@ class DateModPipeline(DateModifier):
|
|
|
302
302
|
"""
|
|
303
303
|
assert isinstance(step, _OffsetUnits)
|
|
304
304
|
if step.offset == 0:
|
|
305
|
-
raise
|
|
305
|
+
raise u.ConfigurationError(f"The length of 'step' must not be zero")
|
|
306
306
|
|
|
307
307
|
output: Sequence[Date] = []
|
|
308
308
|
end_date = self.modify(start_date)
|