squirrels 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of squirrels might be problematic. Click here for more details.
- squirrels/__init__.py +11 -4
- squirrels/_api_response_models.py +118 -0
- squirrels/_api_server.py +140 -75
- squirrels/_authenticator.py +10 -8
- squirrels/_command_line.py +17 -11
- squirrels/_connection_set.py +2 -2
- squirrels/_constants.py +13 -5
- squirrels/_initializer.py +23 -13
- squirrels/_manifest.py +20 -10
- squirrels/_models.py +303 -148
- squirrels/_parameter_configs.py +195 -57
- squirrels/_parameter_sets.py +14 -17
- squirrels/_py_module.py +2 -4
- squirrels/_seeds.py +38 -0
- squirrels/_utils.py +41 -33
- squirrels/arguments/run_time_args.py +76 -34
- squirrels/data_sources.py +172 -51
- squirrels/dateutils.py +3 -3
- squirrels/package_data/assets/index.js +14 -14
- squirrels/package_data/base_project/connections.yml +1 -1
- squirrels/package_data/base_project/database/expenses.db +0 -0
- squirrels/package_data/base_project/docker/Dockerfile +1 -1
- squirrels/package_data/base_project/environcfg.yml +7 -7
- squirrels/package_data/base_project/models/dbviews/database_view1.py +25 -14
- squirrels/package_data/base_project/models/dbviews/database_view1.sql +21 -14
- squirrels/package_data/base_project/models/federates/dataset_example.py +6 -5
- squirrels/package_data/base_project/models/federates/dataset_example.sql +1 -1
- squirrels/package_data/base_project/parameters.yml +57 -28
- squirrels/package_data/base_project/pyconfigs/auth.py +11 -10
- squirrels/package_data/base_project/pyconfigs/connections.py +6 -8
- squirrels/package_data/base_project/pyconfigs/context.py +49 -33
- squirrels/package_data/base_project/pyconfigs/parameters.py +62 -30
- squirrels/package_data/base_project/seeds/seed_categories.csv +6 -0
- squirrels/package_data/base_project/seeds/seed_subcategories.csv +15 -0
- squirrels/package_data/base_project/squirrels.yml.j2 +37 -20
- squirrels/parameter_options.py +30 -10
- squirrels/parameters.py +300 -70
- squirrels/user_base.py +3 -13
- squirrels-0.3.0.dist-info/LICENSE +201 -0
- {squirrels-0.2.1.dist-info → squirrels-0.3.0.dist-info}/METADATA +15 -15
- squirrels-0.3.0.dist-info/RECORD +56 -0
- {squirrels-0.2.1.dist-info → squirrels-0.3.0.dist-info}/WHEEL +1 -1
- squirrels/package_data/base_project/seeds/mocks/category.csv +0 -3
- squirrels/package_data/base_project/seeds/mocks/max_filter.csv +0 -2
- squirrels/package_data/base_project/seeds/mocks/subcategory.csv +0 -6
- squirrels-0.2.1.dist-info/LICENSE +0 -22
- squirrels-0.2.1.dist-info/RECORD +0 -55
- {squirrels-0.2.1.dist-info → squirrels-0.3.0.dist-info}/entry_points.txt +0 -0
squirrels/__init__.py
CHANGED
|
@@ -1,8 +1,15 @@
|
|
|
1
|
-
__version__ = '0.
|
|
1
|
+
__version__ = '0.3.0'
|
|
2
2
|
|
|
3
3
|
from .arguments.init_time_args import ConnectionsArgs, ParametersArgs
|
|
4
4
|
from .arguments.run_time_args import AuthArgs, ContextArgs, ModelDepsArgs, ModelArgs
|
|
5
|
-
|
|
6
|
-
from .
|
|
7
|
-
from .
|
|
5
|
+
|
|
6
|
+
from .parameter_options import SelectParameterOption, DateParameterOption, DateRangeParameterOption
|
|
7
|
+
from .parameter_options import NumberParameterOption, NumberRangeParameterOption, TextParameterOption
|
|
8
|
+
|
|
9
|
+
from .parameters import SingleSelectParameter, MultiSelectParameter, DateParameter, DateRangeParameter
|
|
10
|
+
from .parameters import NumberParameter, NumberRangeParameter, TextParameter
|
|
11
|
+
|
|
12
|
+
from .data_sources import SingleSelectDataSource, MultiSelectDataSource, SelectDataSource, DateDataSource, DateRangeDataSource
|
|
13
|
+
from .data_sources import NumberDataSource, NumberRangeDataSource, TextDataSource
|
|
14
|
+
|
|
8
15
|
from .user_base import User, WrongPassword
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from typing import Annotated, Union, Optional
|
|
2
|
+
from pydantic import BaseModel, Field
|
|
3
|
+
from datetime import datetime, date
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class LoginReponse(BaseModel):
|
|
7
|
+
access_token: Annotated[str, Field(examples=["encoded_jwt_token"])]
|
|
8
|
+
token_type: Annotated[str, Field(examples=["bearer"])]
|
|
9
|
+
username: Annotated[str, Field(examples=["johndoe"])]
|
|
10
|
+
expiry_time: datetime
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
## Parameters Response Models
|
|
14
|
+
|
|
15
|
+
class ParameterOptionModel(BaseModel):
|
|
16
|
+
id: str
|
|
17
|
+
label: str
|
|
18
|
+
|
|
19
|
+
class ParameterModelBase(BaseModel):
|
|
20
|
+
widget_type: str
|
|
21
|
+
name: str
|
|
22
|
+
label: str
|
|
23
|
+
description: str
|
|
24
|
+
|
|
25
|
+
class SelectParameterModel(ParameterModelBase):
|
|
26
|
+
options: list[ParameterOptionModel]
|
|
27
|
+
trigger_refresh: bool
|
|
28
|
+
|
|
29
|
+
class SingleSelectParameterModel(SelectParameterModel):
|
|
30
|
+
widget_type: Annotated[str, Field(examples=["single_select"])]
|
|
31
|
+
selected_id: Optional[str]
|
|
32
|
+
|
|
33
|
+
class MultiSelectParameterModel(SelectParameterModel):
|
|
34
|
+
widget_type: Annotated[str, Field(examples=["multi_select"])]
|
|
35
|
+
show_select_all: bool
|
|
36
|
+
is_dropdown: bool
|
|
37
|
+
order_matters: bool
|
|
38
|
+
selected_ids: list[str]
|
|
39
|
+
|
|
40
|
+
class DateParameterModel(ParameterModelBase):
|
|
41
|
+
widget_type: Annotated[str, Field(examples=["date"])]
|
|
42
|
+
selected_date: date
|
|
43
|
+
|
|
44
|
+
class DateRangeParameterModel(ParameterModelBase):
|
|
45
|
+
widget_type: Annotated[str, Field(examples=["date_range"])]
|
|
46
|
+
selected_start_date: date
|
|
47
|
+
selected_end_date: date
|
|
48
|
+
|
|
49
|
+
class NumericParameterModel(ParameterModelBase):
|
|
50
|
+
min_value: Annotated[float, Field(examples=[0])]
|
|
51
|
+
max_value: Annotated[float, Field(examples=[10])]
|
|
52
|
+
increment: Annotated[float, Field(examples=[1])]
|
|
53
|
+
|
|
54
|
+
class NumberParameterModel(NumericParameterModel):
|
|
55
|
+
widget_type: Annotated[str, Field(examples=["number"])]
|
|
56
|
+
selected_value: Annotated[float, Field(examples=[2])]
|
|
57
|
+
|
|
58
|
+
class NumberRangeParameterModel(NumericParameterModel):
|
|
59
|
+
widget_type: Annotated[str, Field(examples=["number_range"])]
|
|
60
|
+
selected_lower_value: Annotated[float, Field(examples=[2])]
|
|
61
|
+
selected_upper_value: Annotated[float, Field(examples=[8])]
|
|
62
|
+
|
|
63
|
+
class TextParameterModel(ParameterModelBase):
|
|
64
|
+
widget_type: Annotated[str, Field(examples=["text"])]
|
|
65
|
+
entered_text: str
|
|
66
|
+
is_textarea: bool
|
|
67
|
+
|
|
68
|
+
class ParametersModel(BaseModel):
|
|
69
|
+
parameters: list[
|
|
70
|
+
Union[
|
|
71
|
+
ParameterModelBase, SingleSelectParameterModel, MultiSelectParameterModel, DateParameterModel, DateRangeParameterModel,
|
|
72
|
+
NumberParameterModel, NumberRangeParameterModel, TextParameterModel
|
|
73
|
+
]
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
## Dataset Results Response Models
|
|
78
|
+
|
|
79
|
+
class ColumnModel(BaseModel):
|
|
80
|
+
name: Annotated[str, Field(examples=["mycol"])]
|
|
81
|
+
type: str
|
|
82
|
+
|
|
83
|
+
class SchemaModel(BaseModel):
|
|
84
|
+
fields: list[ColumnModel]
|
|
85
|
+
dimensions: Annotated[list[str], Field(examples=[["mycol"]])]
|
|
86
|
+
|
|
87
|
+
class DatasetResultModel(BaseModel):
|
|
88
|
+
data_schema: Annotated[SchemaModel, Field(alias='schema')]
|
|
89
|
+
data: Annotated[list[dict], Field(examples=[[{"mycol": "myval"}]])]
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
## Catalog Response Models
|
|
93
|
+
|
|
94
|
+
class ProjectVersionModel(BaseModel):
|
|
95
|
+
major_version: int
|
|
96
|
+
minor_versions: list[int]
|
|
97
|
+
token_path: Annotated[str, Field(examples=["/squirrels-v0/myproject/v1/token"])]
|
|
98
|
+
datasets_path: Annotated[str, Field(examples=["/squirrels-v0/myproject/v1/datasets"])]
|
|
99
|
+
|
|
100
|
+
class ProjectModel(BaseModel):
|
|
101
|
+
name: Annotated[str, Field(examples=["myproject"])]
|
|
102
|
+
label: Annotated[str, Field(examples=["My Project"])]
|
|
103
|
+
versions: list[ProjectVersionModel]
|
|
104
|
+
|
|
105
|
+
class CatalogModel(BaseModel):
|
|
106
|
+
projects: list[ProjectModel]
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
## Datasets Catalog Response Models
|
|
110
|
+
|
|
111
|
+
class DatasetInfoModel(BaseModel):
|
|
112
|
+
name: Annotated[str, Field(examples=["mydataset"])]
|
|
113
|
+
label: Annotated[str, Field(examples=["My Dataset"])]
|
|
114
|
+
parameters_path: Annotated[str, Field(examples=["/squirrels-v0/myproject/v1/dataset/mydataset/parameters"])]
|
|
115
|
+
result_path: Annotated[str, Field(examples=["/squirrels-v0/myproject/v1/dataset/mydataset"])]
|
|
116
|
+
|
|
117
|
+
class DatasetsCatalogModel(BaseModel):
|
|
118
|
+
datasets: list[DatasetInfoModel]
|
squirrels/_api_server.py
CHANGED
|
@@ -1,16 +1,20 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Iterable, Optional, Mapping, Callable, Coroutine, TypeVar, Annotated, Any
|
|
2
|
+
from dataclasses import make_dataclass, asdict
|
|
2
3
|
from fastapi import Depends, FastAPI, Request, HTTPException, Response, status
|
|
3
4
|
from fastapi.responses import HTMLResponse, JSONResponse
|
|
4
5
|
from fastapi.templating import Jinja2Templates
|
|
5
6
|
from fastapi.staticfiles import StaticFiles
|
|
6
7
|
from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
|
|
7
8
|
from fastapi.middleware.cors import CORSMiddleware
|
|
9
|
+
from pydantic import create_model, BaseModel
|
|
8
10
|
from cachetools import TTLCache
|
|
9
|
-
|
|
11
|
+
from pandas.api import types as pd_types
|
|
12
|
+
import os, mimetypes, traceback, json, pandas as pd
|
|
10
13
|
|
|
11
|
-
from . import _constants as c, _utils as u
|
|
14
|
+
from . import _constants as c, _utils as u, _api_response_models as arm
|
|
12
15
|
from ._version import sq_major_version
|
|
13
16
|
from ._manifest import ManifestIO
|
|
17
|
+
from ._parameter_sets import ParameterConfigsSetIO
|
|
14
18
|
from ._authenticator import User, Authenticator
|
|
15
19
|
from ._timer import timer, time
|
|
16
20
|
from ._parameter_sets import ParameterSet
|
|
@@ -19,17 +23,42 @@ from ._models import ModelsIO
|
|
|
19
23
|
mimetypes.add_type('application/javascript', '.js')
|
|
20
24
|
|
|
21
25
|
|
|
26
|
+
def df_to_api_response0(df: pd.DataFrame, dimensions: list[str] = None) -> arm.DatasetResultModel:
|
|
27
|
+
"""
|
|
28
|
+
Convert a pandas DataFrame to the response format that the dataset result API of Squirrels outputs.
|
|
29
|
+
|
|
30
|
+
Parameters:
|
|
31
|
+
df: The dataframe to convert into an API response
|
|
32
|
+
dimensions: The list of declared dimensions. If None, all non-numeric columns are assumed as dimensions
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
The response of a Squirrels dataset result API
|
|
36
|
+
"""
|
|
37
|
+
in_df_json = json.loads(df.to_json(orient='table', index=False))
|
|
38
|
+
out_fields = []
|
|
39
|
+
non_numeric_fields = []
|
|
40
|
+
for in_column in in_df_json["schema"]["fields"]:
|
|
41
|
+
col_name: str = in_column["name"]
|
|
42
|
+
out_column = arm.ColumnModel(name=col_name, type=in_column["type"])
|
|
43
|
+
out_fields.append(out_column)
|
|
44
|
+
|
|
45
|
+
if not pd_types.is_numeric_dtype(df[col_name].dtype):
|
|
46
|
+
non_numeric_fields.append(col_name)
|
|
47
|
+
|
|
48
|
+
out_dimensions = non_numeric_fields if dimensions is None else dimensions
|
|
49
|
+
out_schema = arm.SchemaModel(fields=out_fields, dimensions=out_dimensions)
|
|
50
|
+
return arm.DatasetResultModel(schema=out_schema, data=in_df_json["data"])
|
|
51
|
+
|
|
52
|
+
|
|
22
53
|
class ApiServer:
|
|
23
|
-
def __init__(self, no_cache: bool
|
|
54
|
+
def __init__(self, no_cache: bool) -> None:
|
|
24
55
|
"""
|
|
25
56
|
Constructor for ApiServer
|
|
26
57
|
|
|
27
58
|
Parameters:
|
|
28
59
|
no_cache (bool): Whether to disable caching
|
|
29
|
-
debug (bool): Set to True to show "hidden" parameters in the /parameters endpoint response
|
|
30
60
|
"""
|
|
31
61
|
self.no_cache = no_cache
|
|
32
|
-
self.debug = debug
|
|
33
62
|
self.dataset_configs = ManifestIO.obj.datasets
|
|
34
63
|
|
|
35
64
|
token_expiry_minutes = ManifestIO.obj.settings.get(c.AUTH_TOKEN_EXPIRE_SETTING, 30)
|
|
@@ -126,8 +155,13 @@ class ApiServer:
|
|
|
126
155
|
# Changing selections into a cachable "frozenset" that will later be converted to dictionary
|
|
127
156
|
selections = set()
|
|
128
157
|
for key, val in params.items():
|
|
129
|
-
if
|
|
130
|
-
|
|
158
|
+
if val is None:
|
|
159
|
+
continue
|
|
160
|
+
if isinstance(val, (list, tuple)):
|
|
161
|
+
if len(val) == 1: # for backward compatibility
|
|
162
|
+
val = val[0]
|
|
163
|
+
else:
|
|
164
|
+
val = tuple(val)
|
|
131
165
|
selections.add((u.normalize_name(key), val))
|
|
132
166
|
selections = frozenset(selections)
|
|
133
167
|
|
|
@@ -141,25 +175,24 @@ class ApiServer:
|
|
|
141
175
|
cache[cache_key] = result
|
|
142
176
|
return result
|
|
143
177
|
|
|
178
|
+
def get_dataset_from_request_path(request: Request, section: int) -> str:
|
|
179
|
+
url_path: str = request.scope['route'].path
|
|
180
|
+
return url_path.split('/')[section]
|
|
181
|
+
|
|
144
182
|
# Login
|
|
145
183
|
token_path = base_path + '/token'
|
|
146
184
|
|
|
147
185
|
oauth2_scheme = OAuth2PasswordBearer(tokenUrl=token_path, auto_error=False)
|
|
148
186
|
|
|
149
187
|
@app.post(token_path)
|
|
150
|
-
async def login_for_access_token(form_data: OAuth2PasswordRequestForm = Depends()):
|
|
188
|
+
async def login_for_access_token(form_data: OAuth2PasswordRequestForm = Depends()) -> arm.LoginReponse:
|
|
151
189
|
user: Optional[User] = self.authenticator.authenticate_user(form_data.username, form_data.password)
|
|
152
190
|
if not user:
|
|
153
191
|
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED,
|
|
154
192
|
detail="Incorrect username or password",
|
|
155
193
|
headers={"WWW-Authenticate": "Bearer"})
|
|
156
194
|
access_token, expiry = self.authenticator.create_access_token(user)
|
|
157
|
-
return
|
|
158
|
-
"access_token": access_token,
|
|
159
|
-
"token_type": "bearer",
|
|
160
|
-
"username": user.username,
|
|
161
|
-
"expiry_time": expiry
|
|
162
|
-
}
|
|
195
|
+
return arm.LoginReponse(access_token=access_token, token_type="bearer", username=user.username, expiry_time=expiry)
|
|
163
196
|
|
|
164
197
|
async def get_current_user(response: Response, token: str = Depends(oauth2_scheme)) -> Optional[User]:
|
|
165
198
|
user = self.authenticator.get_user_from_token(token)
|
|
@@ -167,11 +200,14 @@ class ApiServer:
|
|
|
167
200
|
response.headers["Applied-Username"] = username
|
|
168
201
|
return user
|
|
169
202
|
|
|
170
|
-
# Parameters API
|
|
203
|
+
# Parameters API Helpers
|
|
171
204
|
parameters_path = base_path + '/dataset/{dataset}/parameters'
|
|
172
205
|
|
|
206
|
+
def get_dataset_for_parameters_request(request: Request) -> str:
|
|
207
|
+
return get_dataset_from_request_path(request, -2)
|
|
208
|
+
|
|
173
209
|
parameters_cache_size = ManifestIO.obj.settings.get(c.PARAMETERS_CACHE_SIZE_SETTING, 1024)
|
|
174
|
-
parameters_cache_ttl = ManifestIO.obj.settings.get(c.PARAMETERS_CACHE_TTL_SETTING,
|
|
210
|
+
parameters_cache_ttl = ManifestIO.obj.settings.get(c.PARAMETERS_CACHE_TTL_SETTING, 60)
|
|
175
211
|
|
|
176
212
|
async def get_parameters_helper(
|
|
177
213
|
user: Optional[User], dataset: str, selections: Iterable[tuple[str, str]], request_version: Optional[int]
|
|
@@ -187,33 +223,21 @@ class ApiServer:
|
|
|
187
223
|
async def get_parameters_cachable(*args) -> T:
|
|
188
224
|
return await do_cachable_action(params_cache, get_parameters_helper, *args)
|
|
189
225
|
|
|
190
|
-
async def get_parameters_definition(dataset: str, user: Optional[User], headers: Mapping, params: Mapping):
|
|
226
|
+
async def get_parameters_definition(dataset: str, user: Optional[User], headers: Mapping, params: Mapping) -> arm.ParametersModel:
|
|
191
227
|
api_function = get_parameters_helper if self.no_cache else get_parameters_cachable
|
|
192
228
|
result = await apply_dataset_api_function(api_function, user, dataset, headers, params)
|
|
193
229
|
return process_based_on_response_version_header(headers, {
|
|
194
|
-
0: result.
|
|
230
|
+
0: result.to_api_response_model0
|
|
195
231
|
})
|
|
196
|
-
|
|
197
|
-
@app.get(parameters_path, response_class=JSONResponse)
|
|
198
|
-
async def get_parameters(dataset: str, request: Request, user: Optional[User] = Depends(get_current_user)):
|
|
199
|
-
start = time.time()
|
|
200
|
-
result = await get_parameters_definition(dataset, user, request.headers, request.query_params)
|
|
201
|
-
timer.add_activity_time("GET REQUEST total time for PARAMETERS", start)
|
|
202
|
-
return result
|
|
203
|
-
|
|
204
|
-
@app.post(parameters_path, response_class=JSONResponse)
|
|
205
|
-
async def get_parameters_with_post(dataset: str, request: Request, user: Optional[User] = Depends(get_current_user)):
|
|
206
|
-
start = time.time()
|
|
207
|
-
request_body = await request.json()
|
|
208
|
-
result = await get_parameters_definition(dataset, user, request.headers, request_body)
|
|
209
|
-
timer.add_activity_time("POST REQUEST total time for PARAMETERS", start)
|
|
210
|
-
return result
|
|
211
232
|
|
|
212
|
-
# Results API
|
|
233
|
+
# Results API Helpers
|
|
213
234
|
results_path = base_path + '/dataset/{dataset}'
|
|
214
235
|
|
|
236
|
+
def get_dataset_for_results_request(request: Request) -> str:
|
|
237
|
+
return get_dataset_from_request_path(request, -1)
|
|
238
|
+
|
|
215
239
|
results_cache_size = ManifestIO.obj.settings.get(c.RESULTS_CACHE_SIZE_SETTING, 128)
|
|
216
|
-
results_cache_ttl = ManifestIO.obj.settings.get(c.RESULTS_CACHE_TTL_SETTING,
|
|
240
|
+
results_cache_ttl = ManifestIO.obj.settings.get(c.RESULTS_CACHE_TTL_SETTING, 60)
|
|
217
241
|
|
|
218
242
|
async def get_results_helper(
|
|
219
243
|
user: Optional[User], dataset: str, selections: Iterable[tuple[str, str]], request_version: Optional[int]
|
|
@@ -224,70 +248,111 @@ class ApiServer:
|
|
|
224
248
|
|
|
225
249
|
results_cache = TTLCache(maxsize=results_cache_size, ttl=results_cache_ttl*60)
|
|
226
250
|
|
|
227
|
-
async def get_results_cachable(*args):
|
|
251
|
+
async def get_results_cachable(*args) -> pd.DataFrame:
|
|
228
252
|
return await do_cachable_action(results_cache, get_results_helper, *args)
|
|
229
253
|
|
|
230
|
-
async def get_results_definition(dataset: str, user: Optional[User], headers: Mapping, params: Mapping):
|
|
254
|
+
async def get_results_definition(dataset: str, user: Optional[User], headers: Mapping, params: Mapping) -> arm.DatasetResultModel:
|
|
231
255
|
api_function = get_results_helper if self.no_cache else get_results_cachable
|
|
232
256
|
result = await apply_dataset_api_function(api_function, user, dataset, headers, params)
|
|
233
257
|
return process_based_on_response_version_header(headers, {
|
|
234
|
-
0: lambda:
|
|
258
|
+
0: lambda: df_to_api_response0(result)
|
|
235
259
|
})
|
|
236
260
|
|
|
237
|
-
|
|
238
|
-
async def get_results(dataset: str, request: Request, user: Optional[User] = Depends(get_current_user)):
|
|
239
|
-
start = time.time()
|
|
240
|
-
result = await get_results_definition(dataset, user, request.headers, request.query_params)
|
|
241
|
-
timer.add_activity_time("GET REQUEST total time for DATASET", start)
|
|
242
|
-
return result
|
|
261
|
+
param_fields = ParameterConfigsSetIO.obj.get_all_api_field_info()
|
|
243
262
|
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
263
|
+
# Dataset Parameters and Results APIs
|
|
264
|
+
for dataset_name, dataset_cfg in self.dataset_configs.items():
|
|
265
|
+
dataset_normalized = u.normalize_name_for_api(dataset_name)
|
|
266
|
+
curr_parameters_path = parameters_path.format(dataset=dataset_normalized)
|
|
267
|
+
curr_results_path = results_path.format(dataset=dataset_normalized)
|
|
268
|
+
|
|
269
|
+
QueryModelGet = make_dataclass("QueryParams", [
|
|
270
|
+
param_fields[param].as_query_info() for param in dataset_cfg.parameters
|
|
271
|
+
])
|
|
272
|
+
AnnotatedQueryModel = Annotated[QueryModelGet, Depends()]
|
|
273
|
+
|
|
274
|
+
QueryModelPost = create_model("RequestBodyParams", **{
|
|
275
|
+
param: param_fields[param].as_body_info() for param in dataset_cfg.parameters
|
|
276
|
+
})
|
|
277
|
+
|
|
278
|
+
@app.get(curr_parameters_path, response_class=JSONResponse)
|
|
279
|
+
async def get_parameters(
|
|
280
|
+
request: Request, params: AnnotatedQueryModel, user: Optional[User] = Depends(get_current_user) # type: ignore
|
|
281
|
+
) -> arm.ParametersModel:
|
|
282
|
+
start = time.time()
|
|
283
|
+
dataset = get_dataset_for_parameters_request(request)
|
|
284
|
+
result = await get_parameters_definition(dataset, user, request.headers, asdict(params))
|
|
285
|
+
timer.add_activity_time("GET REQUEST total time for PARAMETERS endpoint", start)
|
|
286
|
+
return result
|
|
287
|
+
|
|
288
|
+
@app.post(curr_parameters_path, response_class=JSONResponse)
|
|
289
|
+
async def get_parameters_with_post(
|
|
290
|
+
request: Request, params: QueryModelPost, user: Optional[User] = Depends(get_current_user) # type: ignore
|
|
291
|
+
) -> arm.ParametersModel:
|
|
292
|
+
start = time.time()
|
|
293
|
+
dataset = get_dataset_for_parameters_request(request)
|
|
294
|
+
params: BaseModel = params
|
|
295
|
+
result = await get_parameters_definition(dataset, user, request.headers, params.model_dump())
|
|
296
|
+
timer.add_activity_time("POST REQUEST total time for PARAMETERS endpoint", start)
|
|
297
|
+
return result
|
|
298
|
+
|
|
299
|
+
@app.get(curr_results_path, response_class=JSONResponse)
|
|
300
|
+
async def get_results(
|
|
301
|
+
request: Request, params: AnnotatedQueryModel, user: Optional[User] = Depends(get_current_user) # type: ignore
|
|
302
|
+
) -> arm.DatasetResultModel:
|
|
303
|
+
start = time.time()
|
|
304
|
+
dataset = get_dataset_for_results_request(request)
|
|
305
|
+
result = await get_results_definition(dataset, user, request.headers, asdict(params))
|
|
306
|
+
timer.add_activity_time("GET REQUEST total time for DATASET endpoint", start)
|
|
307
|
+
return result
|
|
308
|
+
|
|
309
|
+
@app.post(curr_results_path, response_class=JSONResponse)
|
|
310
|
+
async def get_results_with_post(
|
|
311
|
+
request: Request, params: QueryModelPost, user: Optional[User] = Depends(get_current_user) # type: ignore
|
|
312
|
+
) -> arm.DatasetResultModel:
|
|
313
|
+
start = time.time()
|
|
314
|
+
dataset = get_dataset_for_results_request(request)
|
|
315
|
+
params: BaseModel = params
|
|
316
|
+
result = await get_results_definition(dataset, user, request.headers, params.model_dump())
|
|
317
|
+
timer.add_activity_time("POST REQUEST total time for DATASET endpoint", start)
|
|
318
|
+
return result
|
|
251
319
|
|
|
252
320
|
# Datasets Catalog API
|
|
253
321
|
datasets_path = base_path + '/datasets'
|
|
254
322
|
|
|
255
|
-
def get_datasets0(user: Optional[User]):
|
|
323
|
+
def get_datasets0(user: Optional[User]) -> arm.DatasetsCatalogModel:
|
|
256
324
|
datasets_info = []
|
|
257
325
|
for dataset_name, dataset_config in self.dataset_configs.items():
|
|
258
326
|
if can_user_access_dataset(user, dataset_name):
|
|
259
327
|
dataset_normalized = u.normalize_name_for_api(dataset_name)
|
|
260
|
-
datasets_info.append(
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
return {"datasets": datasets_info}
|
|
328
|
+
datasets_info.append(arm.DatasetInfoModel(
|
|
329
|
+
name=dataset_name, label=dataset_config.label,
|
|
330
|
+
parameters_path=parameters_path.format(dataset=dataset_normalized),
|
|
331
|
+
result_path=results_path.format(dataset=dataset_normalized)
|
|
332
|
+
))
|
|
333
|
+
return arm.DatasetsCatalogModel(datasets=datasets_info)
|
|
267
334
|
|
|
268
335
|
@app.get(datasets_path)
|
|
269
|
-
def get_datasets(request: Request, user: Optional[User] = Depends(get_current_user)):
|
|
336
|
+
def get_datasets(request: Request, user: Optional[User] = Depends(get_current_user)) -> arm.DatasetsCatalogModel:
|
|
270
337
|
return process_based_on_response_version_header(request.headers, {
|
|
271
338
|
0: lambda: get_datasets0(user)
|
|
272
339
|
})
|
|
273
340
|
|
|
274
341
|
# Projects Catalog API
|
|
275
|
-
def get_catalog0():
|
|
276
|
-
return
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
}]
|
|
287
|
-
}
|
|
342
|
+
def get_catalog0() -> arm.CatalogModel:
|
|
343
|
+
return arm.CatalogModel(projects=[arm.ProjectModel(
|
|
344
|
+
name=ManifestIO.obj.project_variables.get_name(),
|
|
345
|
+
label=ManifestIO.obj.project_variables.get_label(),
|
|
346
|
+
versions=[arm.ProjectVersionModel(
|
|
347
|
+
major_version=ManifestIO.obj.project_variables.get_major_version(),
|
|
348
|
+
minor_versions=[0],
|
|
349
|
+
token_path=token_path,
|
|
350
|
+
datasets_path=datasets_path
|
|
351
|
+
)]
|
|
352
|
+
)])
|
|
288
353
|
|
|
289
354
|
@app.get(squirrels_version_path, response_class=JSONResponse)
|
|
290
|
-
async def get_catalog(request: Request):
|
|
355
|
+
async def get_catalog(request: Request) -> arm.CatalogModel:
|
|
291
356
|
return process_based_on_response_version_header(request.headers, {
|
|
292
357
|
0: lambda: get_catalog0()
|
|
293
358
|
})
|
squirrels/_authenticator.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from typing import Optional
|
|
2
2
|
from datetime import datetime, timedelta, timezone
|
|
3
|
-
from
|
|
4
|
-
import secrets
|
|
3
|
+
from jwt.exceptions import InvalidTokenError
|
|
4
|
+
import secrets, jwt
|
|
5
5
|
|
|
6
6
|
from . import _utils as u, _constants as c
|
|
7
7
|
from .arguments.run_time_args import AuthArgs
|
|
@@ -16,7 +16,7 @@ class Authenticator:
|
|
|
16
16
|
|
|
17
17
|
@classmethod
|
|
18
18
|
def get_auth_helper(cls, default_auth_helper = None):
|
|
19
|
-
auth_module_path = u.join_paths(c.
|
|
19
|
+
auth_module_path = u.join_paths(c.PYCONFIGS_FOLDER, c.AUTH_FILE)
|
|
20
20
|
return PyModule(auth_module_path, default_class=default_auth_helper)
|
|
21
21
|
|
|
22
22
|
def __init__(self, token_expiry_minutes: int, auth_helper = None) -> None:
|
|
@@ -47,15 +47,17 @@ class Authenticator:
|
|
|
47
47
|
if not isinstance(real_user, WrongPassword):
|
|
48
48
|
fake_users = EnvironConfigIO.obj.get_users()
|
|
49
49
|
if username in fake_users and secrets.compare_digest(fake_users[username][c.USER_PWD_KEY], password):
|
|
50
|
-
|
|
50
|
+
fake_user = fake_users[username].copy()
|
|
51
|
+
fake_user.pop("username", "")
|
|
52
|
+
is_internal = fake_user.pop("is_internal", False)
|
|
51
53
|
try:
|
|
52
|
-
return user_cls.Create(username,
|
|
54
|
+
return user_cls.Create(username, is_internal=is_internal, **fake_user)
|
|
53
55
|
except Exception as e:
|
|
54
56
|
raise u.FileExecutionError(f'Failed to create user from User model in {c.AUTH_FILE}', e)
|
|
55
57
|
|
|
56
58
|
return None
|
|
57
59
|
|
|
58
|
-
def create_access_token(self, user: User) -> str:
|
|
60
|
+
def create_access_token(self, user: User) -> tuple[str, datetime]:
|
|
59
61
|
expire = datetime.now(timezone.utc) + timedelta(minutes=self.token_expiry_minutes)
|
|
60
62
|
to_encode = {**vars(user), "exp": expire}
|
|
61
63
|
encoded_jwt = jwt.encode(to_encode, self.secret_key, algorithm=self.algorithm)
|
|
@@ -64,11 +66,11 @@ class Authenticator:
|
|
|
64
66
|
def get_user_from_token(self, token: Optional[str]) -> Optional[User]:
|
|
65
67
|
if token is not None:
|
|
66
68
|
try:
|
|
67
|
-
payload = jwt.decode(token, self.secret_key, algorithms=[self.algorithm])
|
|
69
|
+
payload: dict = jwt.decode(token, self.secret_key, algorithms=[self.algorithm])
|
|
68
70
|
payload.pop("exp")
|
|
69
71
|
user_cls: User = self.auth_helper.get_func_or_class("User", default_attr=User)
|
|
70
72
|
return user_cls._FromDict(payload)
|
|
71
|
-
except
|
|
73
|
+
except InvalidTokenError:
|
|
72
74
|
return None
|
|
73
75
|
|
|
74
76
|
def can_user_access_scope(self, user: Optional[User], scope: DatasetScope) -> bool:
|
squirrels/_command_line.py
CHANGED
|
@@ -31,18 +31,22 @@ def main():
|
|
|
31
31
|
module_parser = subparsers.add_parser(c.DEPS_CMD, help=f'Load all packages specified in {c.MANIFEST_FILE} (from git)', add_help=False)
|
|
32
32
|
module_parser.add_argument('-h', '--help', action="help", help="Show this help message and exit")
|
|
33
33
|
|
|
34
|
-
compile_parser = subparsers.add_parser(c.COMPILE_CMD, help='Create
|
|
34
|
+
compile_parser = subparsers.add_parser(c.COMPILE_CMD, help='Create rendered SQL files in the folder "./target/compile"', add_help=False)
|
|
35
|
+
compile_dataset_group = compile_parser.add_mutually_exclusive_group(required=True)
|
|
36
|
+
compile_test_set_group = compile_parser.add_mutually_exclusive_group(required=False)
|
|
35
37
|
compile_parser.add_argument('-h', '--help', action="help", help="Show this help message and exit")
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
38
|
+
|
|
39
|
+
compile_dataset_group.add_argument('-d', '--dataset', type=str, help="Select dataset to use for dataset traits. Is required, unless using --all-datasets")
|
|
40
|
+
compile_dataset_group.add_argument('-D', '--all-datasets', action="store_true", help="Compile models for all datasets. Only required if --dataset is not specified")
|
|
41
|
+
compile_test_set_group.add_argument('-t', '--test-set', type=str, help="The selection test set to use. If not specified, default selections are used, unless using --all-test-sets")
|
|
42
|
+
compile_test_set_group.add_argument('-T', '--all-test-sets', action="store_true", help="Compile models for all selection test sets")
|
|
43
|
+
|
|
44
|
+
compile_parser.add_argument('-s', '--select', type=str, help="Select single model to compile. If not specified, all models for the dataset are compiled. Ignored if using --all-datasets")
|
|
40
45
|
compile_parser.add_argument('-r', '--runquery', action='store_true', help='Runs all target models, and produce the results as csv files')
|
|
41
46
|
|
|
42
|
-
run_parser = subparsers.add_parser(c.RUN_CMD, help='Run the
|
|
47
|
+
run_parser = subparsers.add_parser(c.RUN_CMD, help='Run the API server', add_help=False)
|
|
43
48
|
run_parser.add_argument('-h', '--help', action="help", help="Show this help message and exit")
|
|
44
49
|
run_parser.add_argument('--no-cache', action='store_true', help='Do not cache any api results')
|
|
45
|
-
run_parser.add_argument('--debug', action='store_true', help='Show all "hidden parameters" in the parameters response')
|
|
46
50
|
run_parser.add_argument('--host', type=str, default='127.0.0.1', help="The host to run on")
|
|
47
51
|
run_parser.add_argument('--port', type=int, default=4465, help="The port to run on")
|
|
48
52
|
|
|
@@ -58,6 +62,7 @@ def main():
|
|
|
58
62
|
from ._package_loader import PackageLoaderIO
|
|
59
63
|
from ._connection_set import ConnectionSetIO
|
|
60
64
|
from ._parameter_sets import ParameterConfigsSetIO
|
|
65
|
+
from ._seeds import SeedsIO
|
|
61
66
|
|
|
62
67
|
if args.version:
|
|
63
68
|
print(__version__)
|
|
@@ -68,18 +73,19 @@ def main():
|
|
|
68
73
|
PackageLoaderIO.LoadPackages(reload=True)
|
|
69
74
|
elif args.command in [c.RUN_CMD, c.COMPILE_CMD]:
|
|
70
75
|
ManifestIO.LoadFromFile()
|
|
76
|
+
SeedsIO.LoadFiles()
|
|
71
77
|
ConnectionSetIO.LoadFromFile()
|
|
72
78
|
try:
|
|
73
79
|
ParameterConfigsSetIO.LoadFromFile()
|
|
74
80
|
ModelsIO.LoadFiles()
|
|
75
|
-
|
|
76
81
|
if args.command == c.RUN_CMD:
|
|
77
|
-
server = ApiServer(args.no_cache
|
|
82
|
+
server = ApiServer(args.no_cache)
|
|
78
83
|
server.run(args)
|
|
79
|
-
pass
|
|
80
84
|
elif args.command == c.COMPILE_CMD:
|
|
81
|
-
task = ModelsIO.WriteOutputs(args.dataset, args.
|
|
85
|
+
task = ModelsIO.WriteOutputs(args.dataset, args.all_datasets, args.select, args.test_set, args.all_test_sets, args.runquery)
|
|
82
86
|
asyncio.run(task)
|
|
87
|
+
except KeyboardInterrupt:
|
|
88
|
+
pass
|
|
83
89
|
finally:
|
|
84
90
|
ConnectionSetIO.Dispose()
|
|
85
91
|
elif args.command is None:
|
squirrels/_connection_set.py
CHANGED
|
@@ -29,9 +29,9 @@ class ConnectionSet:
|
|
|
29
29
|
raise u.ConfigurationError(f'Connection name "{conn_name}" was not configured') from e
|
|
30
30
|
return connection_pool
|
|
31
31
|
|
|
32
|
-
def run_sql_query_from_conn_name(self, query: str, conn_name: str) -> pd.DataFrame:
|
|
32
|
+
def run_sql_query_from_conn_name(self, query: str, conn_name: str, placeholders: dict = {}) -> pd.DataFrame:
|
|
33
33
|
engine = self._get_engine(conn_name)
|
|
34
|
-
df = pd.read_sql(query, engine)
|
|
34
|
+
df = pd.read_sql(query, engine, params=placeholders)
|
|
35
35
|
return df
|
|
36
36
|
|
|
37
37
|
def _dispose(self) -> None:
|
squirrels/_constants.py
CHANGED
|
@@ -39,6 +39,7 @@ PARAMETER_ARGS_KEY = 'arguments'
|
|
|
39
39
|
TEST_SETS_KEY = 'selection_test_sets'
|
|
40
40
|
TEST_SET_NAME_KEY = 'name'
|
|
41
41
|
DEFAULT_TEST_SET_NAME = 'default'
|
|
42
|
+
TEST_SET_DATASETS_KEY = 'datasets'
|
|
42
43
|
TEST_SET_USER_ATTR_KEY = 'user_attributes'
|
|
43
44
|
TEST_SET_PARAMETERS_KEY = 'parameters'
|
|
44
45
|
|
|
@@ -48,6 +49,7 @@ DATASET_LABEL_KEY = 'label'
|
|
|
48
49
|
DATASET_MODEL_KEY = 'model'
|
|
49
50
|
DATASET_PARAMETERS_KEY = 'parameters'
|
|
50
51
|
DATASET_TRAITS_KEY = 'traits'
|
|
52
|
+
DATASET_DEFAULT_TEST_SET_KEY = 'default_test_set'
|
|
51
53
|
|
|
52
54
|
DATASET_SCOPE_KEY = 'scope'
|
|
53
55
|
PUBLIC_SCOPE = 'public'
|
|
@@ -94,7 +96,7 @@ FEDERATES_FOLDER = 'federates'
|
|
|
94
96
|
FEDERATE_SQL_NAME = 'dataset_example.sql'
|
|
95
97
|
FEDERATE_PY_NAME = 'dataset_example.py'
|
|
96
98
|
|
|
97
|
-
|
|
99
|
+
PYCONFIGS_FOLDER = 'pyconfigs'
|
|
98
100
|
AUTH_FILE = 'auth.py'
|
|
99
101
|
CONNECTIONS_FILE = 'connections.py'
|
|
100
102
|
CONTEXT_FILE = 'context.py'
|
|
@@ -103,9 +105,9 @@ PARAMETERS_FILE = 'parameters.py'
|
|
|
103
105
|
TARGET_FOLDER = 'target'
|
|
104
106
|
COMPILE_FOLDER = 'compile'
|
|
105
107
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
108
|
+
SEEDS_FOLDER = 'seeds'
|
|
109
|
+
CATEGORY_SEED_FILE = 'seed_categories.csv'
|
|
110
|
+
SUBCATEGORY_SEED_FILE = 'seed_subcategories.csv'
|
|
109
111
|
|
|
110
112
|
# Dataset setting names
|
|
111
113
|
AUTH_TOKEN_EXPIRE_SETTING = 'auth.token.expire_minutes'
|
|
@@ -116,6 +118,8 @@ RESULTS_CACHE_TTL_SETTING = 'results.cache.ttl_minutes'
|
|
|
116
118
|
TEST_SET_DEFAULT_USED_SETTING = 'selection_test_sets.default_name_used'
|
|
117
119
|
DB_CONN_DEFAULT_USED_SETTING = 'connections.default_name_used'
|
|
118
120
|
DEFAULT_MATERIALIZE_SETTING = 'defaults.federates.materialized'
|
|
121
|
+
SEEDS_INFER_SCHEMA_SETTING = 'seeds.infer_schema'
|
|
122
|
+
SEEDS_NA_VALUES_SETTING = 'seeds.na_values'
|
|
119
123
|
IN_MEMORY_DB_SETTING = 'in_memory_database'
|
|
120
124
|
SQLITE = 'sqlite'
|
|
121
125
|
DUCKDB = 'duckdb'
|
|
@@ -138,9 +142,13 @@ CONF_FORMAT_CHOICES2 = [(PYTHON_FORMAT2, PYTHON_FORMAT), YML_FORMAT]
|
|
|
138
142
|
|
|
139
143
|
EXPENSES_DB_NAME = 'expenses'
|
|
140
144
|
WEATHER_DB_NAME = 'weather'
|
|
141
|
-
|
|
145
|
+
NO_DB = 'none'
|
|
146
|
+
DATABASE_CHOICES = [EXPENSES_DB_NAME, WEATHER_DB_NAME, NO_DB]
|
|
142
147
|
|
|
143
148
|
# Function names
|
|
144
149
|
GET_USER_FUNC = "get_user_if_valid"
|
|
145
150
|
DEP_FUNC = "dependencies"
|
|
146
151
|
MAIN_FUNC = "main"
|
|
152
|
+
|
|
153
|
+
# Regex
|
|
154
|
+
date_regex = r'^\d{4}\-\d{2}\-\d{2}$'
|