squirrels 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of squirrels might be problematic. Click here for more details.

Files changed (48) hide show
  1. squirrels/__init__.py +11 -4
  2. squirrels/_api_response_models.py +118 -0
  3. squirrels/_api_server.py +140 -75
  4. squirrels/_authenticator.py +10 -8
  5. squirrels/_command_line.py +17 -11
  6. squirrels/_connection_set.py +2 -2
  7. squirrels/_constants.py +13 -5
  8. squirrels/_initializer.py +23 -13
  9. squirrels/_manifest.py +20 -10
  10. squirrels/_models.py +295 -142
  11. squirrels/_parameter_configs.py +195 -57
  12. squirrels/_parameter_sets.py +14 -17
  13. squirrels/_py_module.py +2 -4
  14. squirrels/_seeds.py +38 -0
  15. squirrels/_utils.py +41 -33
  16. squirrels/arguments/run_time_args.py +76 -34
  17. squirrels/data_sources.py +172 -51
  18. squirrels/dateutils.py +3 -3
  19. squirrels/package_data/assets/index.js +14 -14
  20. squirrels/package_data/base_project/connections.yml +1 -1
  21. squirrels/package_data/base_project/database/expenses.db +0 -0
  22. squirrels/package_data/base_project/docker/Dockerfile +1 -1
  23. squirrels/package_data/base_project/environcfg.yml +7 -7
  24. squirrels/package_data/base_project/models/dbviews/database_view1.py +25 -14
  25. squirrels/package_data/base_project/models/dbviews/database_view1.sql +21 -14
  26. squirrels/package_data/base_project/models/federates/dataset_example.py +6 -5
  27. squirrels/package_data/base_project/models/federates/dataset_example.sql +1 -1
  28. squirrels/package_data/base_project/parameters.yml +57 -28
  29. squirrels/package_data/base_project/pyconfigs/auth.py +11 -10
  30. squirrels/package_data/base_project/pyconfigs/connections.py +6 -8
  31. squirrels/package_data/base_project/pyconfigs/context.py +49 -33
  32. squirrels/package_data/base_project/pyconfigs/parameters.py +62 -30
  33. squirrels/package_data/base_project/seeds/seed_categories.csv +6 -0
  34. squirrels/package_data/base_project/seeds/seed_subcategories.csv +15 -0
  35. squirrels/package_data/base_project/squirrels.yml.j2 +37 -20
  36. squirrels/parameter_options.py +30 -10
  37. squirrels/parameters.py +300 -70
  38. squirrels/user_base.py +3 -13
  39. squirrels-0.3.0.dist-info/LICENSE +201 -0
  40. {squirrels-0.2.2.dist-info → squirrels-0.3.0.dist-info}/METADATA +15 -15
  41. squirrels-0.3.0.dist-info/RECORD +56 -0
  42. squirrels/package_data/base_project/seeds/mocks/category.csv +0 -3
  43. squirrels/package_data/base_project/seeds/mocks/max_filter.csv +0 -2
  44. squirrels/package_data/base_project/seeds/mocks/subcategory.csv +0 -6
  45. squirrels-0.2.2.dist-info/LICENSE +0 -22
  46. squirrels-0.2.2.dist-info/RECORD +0 -55
  47. {squirrels-0.2.2.dist-info → squirrels-0.3.0.dist-info}/WHEEL +0 -0
  48. {squirrels-0.2.2.dist-info → squirrels-0.3.0.dist-info}/entry_points.txt +0 -0
squirrels/__init__.py CHANGED
@@ -1,8 +1,15 @@
1
- __version__ = '0.2.2'
1
+ __version__ = '0.3.0'
2
2
 
3
3
  from .arguments.init_time_args import ConnectionsArgs, ParametersArgs
4
4
  from .arguments.run_time_args import AuthArgs, ContextArgs, ModelDepsArgs, ModelArgs
5
- from .parameter_options import SelectParameterOption, DateParameterOption, DateRangeParameterOption, NumberParameterOption, NumberRangeParameterOption
6
- from .parameters import SingleSelectParameter, MultiSelectParameter, DateParameter, DateRangeParameter, NumberParameter, NumberRangeParameter
7
- from .data_sources import SingleSelectDataSource, MultiSelectDataSource, DateDataSource, DateRangeDataSource, NumberDataSource, NumberRangeDataSource
5
+
6
+ from .parameter_options import SelectParameterOption, DateParameterOption, DateRangeParameterOption
7
+ from .parameter_options import NumberParameterOption, NumberRangeParameterOption, TextParameterOption
8
+
9
+ from .parameters import SingleSelectParameter, MultiSelectParameter, DateParameter, DateRangeParameter
10
+ from .parameters import NumberParameter, NumberRangeParameter, TextParameter
11
+
12
+ from .data_sources import SingleSelectDataSource, MultiSelectDataSource, SelectDataSource, DateDataSource, DateRangeDataSource
13
+ from .data_sources import NumberDataSource, NumberRangeDataSource, TextDataSource
14
+
8
15
  from .user_base import User, WrongPassword
@@ -0,0 +1,118 @@
1
+ from typing import Annotated, Union, Optional
2
+ from pydantic import BaseModel, Field
3
+ from datetime import datetime, date
4
+
5
+
6
+ class LoginReponse(BaseModel):
7
+ access_token: Annotated[str, Field(examples=["encoded_jwt_token"])]
8
+ token_type: Annotated[str, Field(examples=["bearer"])]
9
+ username: Annotated[str, Field(examples=["johndoe"])]
10
+ expiry_time: datetime
11
+
12
+
13
+ ## Parameters Response Models
14
+
15
+ class ParameterOptionModel(BaseModel):
16
+ id: str
17
+ label: str
18
+
19
+ class ParameterModelBase(BaseModel):
20
+ widget_type: str
21
+ name: str
22
+ label: str
23
+ description: str
24
+
25
+ class SelectParameterModel(ParameterModelBase):
26
+ options: list[ParameterOptionModel]
27
+ trigger_refresh: bool
28
+
29
+ class SingleSelectParameterModel(SelectParameterModel):
30
+ widget_type: Annotated[str, Field(examples=["single_select"])]
31
+ selected_id: Optional[str]
32
+
33
+ class MultiSelectParameterModel(SelectParameterModel):
34
+ widget_type: Annotated[str, Field(examples=["multi_select"])]
35
+ show_select_all: bool
36
+ is_dropdown: bool
37
+ order_matters: bool
38
+ selected_ids: list[str]
39
+
40
+ class DateParameterModel(ParameterModelBase):
41
+ widget_type: Annotated[str, Field(examples=["date"])]
42
+ selected_date: date
43
+
44
+ class DateRangeParameterModel(ParameterModelBase):
45
+ widget_type: Annotated[str, Field(examples=["date_range"])]
46
+ selected_start_date: date
47
+ selected_end_date: date
48
+
49
+ class NumericParameterModel(ParameterModelBase):
50
+ min_value: Annotated[float, Field(examples=[0])]
51
+ max_value: Annotated[float, Field(examples=[10])]
52
+ increment: Annotated[float, Field(examples=[1])]
53
+
54
+ class NumberParameterModel(NumericParameterModel):
55
+ widget_type: Annotated[str, Field(examples=["number"])]
56
+ selected_value: Annotated[float, Field(examples=[2])]
57
+
58
+ class NumberRangeParameterModel(NumericParameterModel):
59
+ widget_type: Annotated[str, Field(examples=["number_range"])]
60
+ selected_lower_value: Annotated[float, Field(examples=[2])]
61
+ selected_upper_value: Annotated[float, Field(examples=[8])]
62
+
63
+ class TextParameterModel(ParameterModelBase):
64
+ widget_type: Annotated[str, Field(examples=["text"])]
65
+ entered_text: str
66
+ is_textarea: bool
67
+
68
+ class ParametersModel(BaseModel):
69
+ parameters: list[
70
+ Union[
71
+ ParameterModelBase, SingleSelectParameterModel, MultiSelectParameterModel, DateParameterModel, DateRangeParameterModel,
72
+ NumberParameterModel, NumberRangeParameterModel, TextParameterModel
73
+ ]
74
+ ]
75
+
76
+
77
+ ## Dataset Results Response Models
78
+
79
+ class ColumnModel(BaseModel):
80
+ name: Annotated[str, Field(examples=["mycol"])]
81
+ type: str
82
+
83
+ class SchemaModel(BaseModel):
84
+ fields: list[ColumnModel]
85
+ dimensions: Annotated[list[str], Field(examples=[["mycol"]])]
86
+
87
+ class DatasetResultModel(BaseModel):
88
+ data_schema: Annotated[SchemaModel, Field(alias='schema')]
89
+ data: Annotated[list[dict], Field(examples=[[{"mycol": "myval"}]])]
90
+
91
+
92
+ ## Catalog Response Models
93
+
94
+ class ProjectVersionModel(BaseModel):
95
+ major_version: int
96
+ minor_versions: list[int]
97
+ token_path: Annotated[str, Field(examples=["/squirrels-v0/myproject/v1/token"])]
98
+ datasets_path: Annotated[str, Field(examples=["/squirrels-v0/myproject/v1/datasets"])]
99
+
100
+ class ProjectModel(BaseModel):
101
+ name: Annotated[str, Field(examples=["myproject"])]
102
+ label: Annotated[str, Field(examples=["My Project"])]
103
+ versions: list[ProjectVersionModel]
104
+
105
+ class CatalogModel(BaseModel):
106
+ projects: list[ProjectModel]
107
+
108
+
109
+ ## Datasets Catalog Response Models
110
+
111
+ class DatasetInfoModel(BaseModel):
112
+ name: Annotated[str, Field(examples=["mydataset"])]
113
+ label: Annotated[str, Field(examples=["My Dataset"])]
114
+ parameters_path: Annotated[str, Field(examples=["/squirrels-v0/myproject/v1/dataset/mydataset/parameters"])]
115
+ result_path: Annotated[str, Field(examples=["/squirrels-v0/myproject/v1/dataset/mydataset"])]
116
+
117
+ class DatasetsCatalogModel(BaseModel):
118
+ datasets: list[DatasetInfoModel]
squirrels/_api_server.py CHANGED
@@ -1,16 +1,20 @@
1
- from typing import List, Iterable, Optional, Mapping, Callable, Coroutine, TypeVar, Any
1
+ from typing import Iterable, Optional, Mapping, Callable, Coroutine, TypeVar, Annotated, Any
2
+ from dataclasses import make_dataclass, asdict
2
3
  from fastapi import Depends, FastAPI, Request, HTTPException, Response, status
3
4
  from fastapi.responses import HTMLResponse, JSONResponse
4
5
  from fastapi.templating import Jinja2Templates
5
6
  from fastapi.staticfiles import StaticFiles
6
7
  from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
7
8
  from fastapi.middleware.cors import CORSMiddleware
9
+ from pydantic import create_model, BaseModel
8
10
  from cachetools import TTLCache
9
- import os, mimetypes, traceback, pandas as pd
11
+ from pandas.api import types as pd_types
12
+ import os, mimetypes, traceback, json, pandas as pd
10
13
 
11
- from . import _constants as c, _utils as u
14
+ from . import _constants as c, _utils as u, _api_response_models as arm
12
15
  from ._version import sq_major_version
13
16
  from ._manifest import ManifestIO
17
+ from ._parameter_sets import ParameterConfigsSetIO
14
18
  from ._authenticator import User, Authenticator
15
19
  from ._timer import timer, time
16
20
  from ._parameter_sets import ParameterSet
@@ -19,17 +23,42 @@ from ._models import ModelsIO
19
23
  mimetypes.add_type('application/javascript', '.js')
20
24
 
21
25
 
26
+ def df_to_api_response0(df: pd.DataFrame, dimensions: list[str] = None) -> arm.DatasetResultModel:
27
+ """
28
+ Convert a pandas DataFrame to the response format that the dataset result API of Squirrels outputs.
29
+
30
+ Parameters:
31
+ df: The dataframe to convert into an API response
32
+ dimensions: The list of declared dimensions. If None, all non-numeric columns are assumed as dimensions
33
+
34
+ Returns:
35
+ The response of a Squirrels dataset result API
36
+ """
37
+ in_df_json = json.loads(df.to_json(orient='table', index=False))
38
+ out_fields = []
39
+ non_numeric_fields = []
40
+ for in_column in in_df_json["schema"]["fields"]:
41
+ col_name: str = in_column["name"]
42
+ out_column = arm.ColumnModel(name=col_name, type=in_column["type"])
43
+ out_fields.append(out_column)
44
+
45
+ if not pd_types.is_numeric_dtype(df[col_name].dtype):
46
+ non_numeric_fields.append(col_name)
47
+
48
+ out_dimensions = non_numeric_fields if dimensions is None else dimensions
49
+ out_schema = arm.SchemaModel(fields=out_fields, dimensions=out_dimensions)
50
+ return arm.DatasetResultModel(schema=out_schema, data=in_df_json["data"])
51
+
52
+
22
53
  class ApiServer:
23
- def __init__(self, no_cache: bool, debug: bool) -> None:
54
+ def __init__(self, no_cache: bool) -> None:
24
55
  """
25
56
  Constructor for ApiServer
26
57
 
27
58
  Parameters:
28
59
  no_cache (bool): Whether to disable caching
29
- debug (bool): Set to True to show "hidden" parameters in the /parameters endpoint response
30
60
  """
31
61
  self.no_cache = no_cache
32
- self.debug = debug
33
62
  self.dataset_configs = ManifestIO.obj.datasets
34
63
 
35
64
  token_expiry_minutes = ManifestIO.obj.settings.get(c.AUTH_TOKEN_EXPIRE_SETTING, 30)
@@ -126,8 +155,13 @@ class ApiServer:
126
155
  # Changing selections into a cachable "frozenset" that will later be converted to dictionary
127
156
  selections = set()
128
157
  for key, val in params.items():
129
- if isinstance(val, List):
130
- val = tuple(val)
158
+ if val is None:
159
+ continue
160
+ if isinstance(val, (list, tuple)):
161
+ if len(val) == 1: # for backward compatibility
162
+ val = val[0]
163
+ else:
164
+ val = tuple(val)
131
165
  selections.add((u.normalize_name(key), val))
132
166
  selections = frozenset(selections)
133
167
 
@@ -141,25 +175,24 @@ class ApiServer:
141
175
  cache[cache_key] = result
142
176
  return result
143
177
 
178
+ def get_dataset_from_request_path(request: Request, section: int) -> str:
179
+ url_path: str = request.scope['route'].path
180
+ return url_path.split('/')[section]
181
+
144
182
  # Login
145
183
  token_path = base_path + '/token'
146
184
 
147
185
  oauth2_scheme = OAuth2PasswordBearer(tokenUrl=token_path, auto_error=False)
148
186
 
149
187
  @app.post(token_path)
150
- async def login_for_access_token(form_data: OAuth2PasswordRequestForm = Depends()):
188
+ async def login_for_access_token(form_data: OAuth2PasswordRequestForm = Depends()) -> arm.LoginReponse:
151
189
  user: Optional[User] = self.authenticator.authenticate_user(form_data.username, form_data.password)
152
190
  if not user:
153
191
  raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED,
154
192
  detail="Incorrect username or password",
155
193
  headers={"WWW-Authenticate": "Bearer"})
156
194
  access_token, expiry = self.authenticator.create_access_token(user)
157
- return {
158
- "access_token": access_token,
159
- "token_type": "bearer",
160
- "username": user.username,
161
- "expiry_time": expiry
162
- }
195
+ return arm.LoginReponse(access_token=access_token, token_type="bearer", username=user.username, expiry_time=expiry)
163
196
 
164
197
  async def get_current_user(response: Response, token: str = Depends(oauth2_scheme)) -> Optional[User]:
165
198
  user = self.authenticator.get_user_from_token(token)
@@ -167,11 +200,14 @@ class ApiServer:
167
200
  response.headers["Applied-Username"] = username
168
201
  return user
169
202
 
170
- # Parameters API
203
+ # Parameters API Helpers
171
204
  parameters_path = base_path + '/dataset/{dataset}/parameters'
172
205
 
206
+ def get_dataset_for_parameters_request(request: Request) -> str:
207
+ return get_dataset_from_request_path(request, -2)
208
+
173
209
  parameters_cache_size = ManifestIO.obj.settings.get(c.PARAMETERS_CACHE_SIZE_SETTING, 1024)
174
- parameters_cache_ttl = ManifestIO.obj.settings.get(c.PARAMETERS_CACHE_TTL_SETTING, 0)
210
+ parameters_cache_ttl = ManifestIO.obj.settings.get(c.PARAMETERS_CACHE_TTL_SETTING, 60)
175
211
 
176
212
  async def get_parameters_helper(
177
213
  user: Optional[User], dataset: str, selections: Iterable[tuple[str, str]], request_version: Optional[int]
@@ -187,33 +223,21 @@ class ApiServer:
187
223
  async def get_parameters_cachable(*args) -> T:
188
224
  return await do_cachable_action(params_cache, get_parameters_helper, *args)
189
225
 
190
- async def get_parameters_definition(dataset: str, user: Optional[User], headers: Mapping, params: Mapping):
226
+ async def get_parameters_definition(dataset: str, user: Optional[User], headers: Mapping, params: Mapping) -> arm.ParametersModel:
191
227
  api_function = get_parameters_helper if self.no_cache else get_parameters_cachable
192
228
  result = await apply_dataset_api_function(api_function, user, dataset, headers, params)
193
229
  return process_based_on_response_version_header(headers, {
194
- 0: result.to_json_dict0
230
+ 0: result.to_api_response_model0
195
231
  })
196
-
197
- @app.get(parameters_path, response_class=JSONResponse)
198
- async def get_parameters(dataset: str, request: Request, user: Optional[User] = Depends(get_current_user)):
199
- start = time.time()
200
- result = await get_parameters_definition(dataset, user, request.headers, request.query_params)
201
- timer.add_activity_time("GET REQUEST total time for PARAMETERS", start)
202
- return result
203
-
204
- @app.post(parameters_path, response_class=JSONResponse)
205
- async def get_parameters_with_post(dataset: str, request: Request, user: Optional[User] = Depends(get_current_user)):
206
- start = time.time()
207
- request_body = await request.json()
208
- result = await get_parameters_definition(dataset, user, request.headers, request_body)
209
- timer.add_activity_time("POST REQUEST total time for PARAMETERS", start)
210
- return result
211
232
 
212
- # Results API
233
+ # Results API Helpers
213
234
  results_path = base_path + '/dataset/{dataset}'
214
235
 
236
+ def get_dataset_for_results_request(request: Request) -> str:
237
+ return get_dataset_from_request_path(request, -1)
238
+
215
239
  results_cache_size = ManifestIO.obj.settings.get(c.RESULTS_CACHE_SIZE_SETTING, 128)
216
- results_cache_ttl = ManifestIO.obj.settings.get(c.RESULTS_CACHE_TTL_SETTING, 0)
240
+ results_cache_ttl = ManifestIO.obj.settings.get(c.RESULTS_CACHE_TTL_SETTING, 60)
217
241
 
218
242
  async def get_results_helper(
219
243
  user: Optional[User], dataset: str, selections: Iterable[tuple[str, str]], request_version: Optional[int]
@@ -224,70 +248,111 @@ class ApiServer:
224
248
 
225
249
  results_cache = TTLCache(maxsize=results_cache_size, ttl=results_cache_ttl*60)
226
250
 
227
- async def get_results_cachable(*args):
251
+ async def get_results_cachable(*args) -> pd.DataFrame:
228
252
  return await do_cachable_action(results_cache, get_results_helper, *args)
229
253
 
230
- async def get_results_definition(dataset: str, user: Optional[User], headers: Mapping, params: Mapping):
254
+ async def get_results_definition(dataset: str, user: Optional[User], headers: Mapping, params: Mapping) -> arm.DatasetResultModel:
231
255
  api_function = get_results_helper if self.no_cache else get_results_cachable
232
256
  result = await apply_dataset_api_function(api_function, user, dataset, headers, params)
233
257
  return process_based_on_response_version_header(headers, {
234
- 0: lambda: u.df_to_json0(result)
258
+ 0: lambda: df_to_api_response0(result)
235
259
  })
236
260
 
237
- @app.get(results_path, response_class=JSONResponse)
238
- async def get_results(dataset: str, request: Request, user: Optional[User] = Depends(get_current_user)):
239
- start = time.time()
240
- result = await get_results_definition(dataset, user, request.headers, request.query_params)
241
- timer.add_activity_time("GET REQUEST total time for DATASET", start)
242
- return result
261
+ param_fields = ParameterConfigsSetIO.obj.get_all_api_field_info()
243
262
 
244
- @app.post(results_path, response_class=JSONResponse)
245
- async def get_results_with_post(dataset: str, request: Request, user: Optional[User] = Depends(get_current_user)):
246
- start = time.time()
247
- request_body = await request.json()
248
- result = await get_results_definition(dataset, user, request.headers, request_body)
249
- timer.add_activity_time("POST REQUEST total time for DATASET", start)
250
- return result
263
+ # Dataset Parameters and Results APIs
264
+ for dataset_name, dataset_cfg in self.dataset_configs.items():
265
+ dataset_normalized = u.normalize_name_for_api(dataset_name)
266
+ curr_parameters_path = parameters_path.format(dataset=dataset_normalized)
267
+ curr_results_path = results_path.format(dataset=dataset_normalized)
268
+
269
+ QueryModelGet = make_dataclass("QueryParams", [
270
+ param_fields[param].as_query_info() for param in dataset_cfg.parameters
271
+ ])
272
+ AnnotatedQueryModel = Annotated[QueryModelGet, Depends()]
273
+
274
+ QueryModelPost = create_model("RequestBodyParams", **{
275
+ param: param_fields[param].as_body_info() for param in dataset_cfg.parameters
276
+ })
277
+
278
+ @app.get(curr_parameters_path, response_class=JSONResponse)
279
+ async def get_parameters(
280
+ request: Request, params: AnnotatedQueryModel, user: Optional[User] = Depends(get_current_user) # type: ignore
281
+ ) -> arm.ParametersModel:
282
+ start = time.time()
283
+ dataset = get_dataset_for_parameters_request(request)
284
+ result = await get_parameters_definition(dataset, user, request.headers, asdict(params))
285
+ timer.add_activity_time("GET REQUEST total time for PARAMETERS endpoint", start)
286
+ return result
287
+
288
+ @app.post(curr_parameters_path, response_class=JSONResponse)
289
+ async def get_parameters_with_post(
290
+ request: Request, params: QueryModelPost, user: Optional[User] = Depends(get_current_user) # type: ignore
291
+ ) -> arm.ParametersModel:
292
+ start = time.time()
293
+ dataset = get_dataset_for_parameters_request(request)
294
+ params: BaseModel = params
295
+ result = await get_parameters_definition(dataset, user, request.headers, params.model_dump())
296
+ timer.add_activity_time("POST REQUEST total time for PARAMETERS endpoint", start)
297
+ return result
298
+
299
+ @app.get(curr_results_path, response_class=JSONResponse)
300
+ async def get_results(
301
+ request: Request, params: AnnotatedQueryModel, user: Optional[User] = Depends(get_current_user) # type: ignore
302
+ ) -> arm.DatasetResultModel:
303
+ start = time.time()
304
+ dataset = get_dataset_for_results_request(request)
305
+ result = await get_results_definition(dataset, user, request.headers, asdict(params))
306
+ timer.add_activity_time("GET REQUEST total time for DATASET endpoint", start)
307
+ return result
308
+
309
+ @app.post(curr_results_path, response_class=JSONResponse)
310
+ async def get_results_with_post(
311
+ request: Request, params: QueryModelPost, user: Optional[User] = Depends(get_current_user) # type: ignore
312
+ ) -> arm.DatasetResultModel:
313
+ start = time.time()
314
+ dataset = get_dataset_for_results_request(request)
315
+ params: BaseModel = params
316
+ result = await get_results_definition(dataset, user, request.headers, params.model_dump())
317
+ timer.add_activity_time("POST REQUEST total time for DATASET endpoint", start)
318
+ return result
251
319
 
252
320
  # Datasets Catalog API
253
321
  datasets_path = base_path + '/datasets'
254
322
 
255
- def get_datasets0(user: Optional[User]):
323
+ def get_datasets0(user: Optional[User]) -> arm.DatasetsCatalogModel:
256
324
  datasets_info = []
257
325
  for dataset_name, dataset_config in self.dataset_configs.items():
258
326
  if can_user_access_dataset(user, dataset_name):
259
327
  dataset_normalized = u.normalize_name_for_api(dataset_name)
260
- datasets_info.append({
261
- 'name': dataset_name,
262
- 'label': dataset_config.label,
263
- 'parameters_path': parameters_path.format(dataset=dataset_normalized),
264
- 'result_path': results_path.format(dataset=dataset_normalized)
265
- })
266
- return {"datasets": datasets_info}
328
+ datasets_info.append(arm.DatasetInfoModel(
329
+ name=dataset_name, label=dataset_config.label,
330
+ parameters_path=parameters_path.format(dataset=dataset_normalized),
331
+ result_path=results_path.format(dataset=dataset_normalized)
332
+ ))
333
+ return arm.DatasetsCatalogModel(datasets=datasets_info)
267
334
 
268
335
  @app.get(datasets_path)
269
- def get_datasets(request: Request, user: Optional[User] = Depends(get_current_user)):
336
+ def get_datasets(request: Request, user: Optional[User] = Depends(get_current_user)) -> arm.DatasetsCatalogModel:
270
337
  return process_based_on_response_version_header(request.headers, {
271
338
  0: lambda: get_datasets0(user)
272
339
  })
273
340
 
274
341
  # Projects Catalog API
275
- def get_catalog0():
276
- return {
277
- 'projects': [{
278
- 'name': ManifestIO.obj.project_variables.get_name(),
279
- 'label': ManifestIO.obj.project_variables.get_label(),
280
- 'versions': [{
281
- 'major_version': ManifestIO.obj.project_variables.get_major_version(),
282
- 'minor_versions': [0],
283
- 'token_path': token_path,
284
- 'datasets_path': datasets_path
285
- }]
286
- }]
287
- }
342
+ def get_catalog0() -> arm.CatalogModel:
343
+ return arm.CatalogModel(projects=[arm.ProjectModel(
344
+ name=ManifestIO.obj.project_variables.get_name(),
345
+ label=ManifestIO.obj.project_variables.get_label(),
346
+ versions=[arm.ProjectVersionModel(
347
+ major_version=ManifestIO.obj.project_variables.get_major_version(),
348
+ minor_versions=[0],
349
+ token_path=token_path,
350
+ datasets_path=datasets_path
351
+ )]
352
+ )])
288
353
 
289
354
  @app.get(squirrels_version_path, response_class=JSONResponse)
290
- async def get_catalog(request: Request):
355
+ async def get_catalog(request: Request) -> arm.CatalogModel:
291
356
  return process_based_on_response_version_header(request.headers, {
292
357
  0: lambda: get_catalog0()
293
358
  })
@@ -1,7 +1,7 @@
1
1
  from typing import Optional
2
2
  from datetime import datetime, timedelta, timezone
3
- from jose import JWTError, jwt
4
- import secrets
3
+ from jwt.exceptions import InvalidTokenError
4
+ import secrets, jwt
5
5
 
6
6
  from . import _utils as u, _constants as c
7
7
  from .arguments.run_time_args import AuthArgs
@@ -16,7 +16,7 @@ class Authenticator:
16
16
 
17
17
  @classmethod
18
18
  def get_auth_helper(cls, default_auth_helper = None):
19
- auth_module_path = u.join_paths(c.PYCONFIG_FOLDER, c.AUTH_FILE)
19
+ auth_module_path = u.join_paths(c.PYCONFIGS_FOLDER, c.AUTH_FILE)
20
20
  return PyModule(auth_module_path, default_class=default_auth_helper)
21
21
 
22
22
  def __init__(self, token_expiry_minutes: int, auth_helper = None) -> None:
@@ -47,15 +47,17 @@ class Authenticator:
47
47
  if not isinstance(real_user, WrongPassword):
48
48
  fake_users = EnvironConfigIO.obj.get_users()
49
49
  if username in fake_users and secrets.compare_digest(fake_users[username][c.USER_PWD_KEY], password):
50
- is_internal = fake_users[username].get("is_internal", False)
50
+ fake_user = fake_users[username].copy()
51
+ fake_user.pop("username", "")
52
+ is_internal = fake_user.pop("is_internal", False)
51
53
  try:
52
- return user_cls.Create(username, fake_users[username], is_internal=is_internal)
54
+ return user_cls.Create(username, is_internal=is_internal, **fake_user)
53
55
  except Exception as e:
54
56
  raise u.FileExecutionError(f'Failed to create user from User model in {c.AUTH_FILE}', e)
55
57
 
56
58
  return None
57
59
 
58
- def create_access_token(self, user: User) -> str:
60
+ def create_access_token(self, user: User) -> tuple[str, datetime]:
59
61
  expire = datetime.now(timezone.utc) + timedelta(minutes=self.token_expiry_minutes)
60
62
  to_encode = {**vars(user), "exp": expire}
61
63
  encoded_jwt = jwt.encode(to_encode, self.secret_key, algorithm=self.algorithm)
@@ -64,11 +66,11 @@ class Authenticator:
64
66
  def get_user_from_token(self, token: Optional[str]) -> Optional[User]:
65
67
  if token is not None:
66
68
  try:
67
- payload = jwt.decode(token, self.secret_key, algorithms=[self.algorithm])
69
+ payload: dict = jwt.decode(token, self.secret_key, algorithms=[self.algorithm])
68
70
  payload.pop("exp")
69
71
  user_cls: User = self.auth_helper.get_func_or_class("User", default_attr=User)
70
72
  return user_cls._FromDict(payload)
71
- except JWTError:
73
+ except InvalidTokenError:
72
74
  return None
73
75
 
74
76
  def can_user_access_scope(self, user: Optional[User], scope: DatasetScope) -> bool:
@@ -31,18 +31,22 @@ def main():
31
31
  module_parser = subparsers.add_parser(c.DEPS_CMD, help=f'Load all packages specified in {c.MANIFEST_FILE} (from git)', add_help=False)
32
32
  module_parser.add_argument('-h', '--help', action="help", help="Show this help message and exit")
33
33
 
34
- compile_parser = subparsers.add_parser(c.COMPILE_CMD, help='Create files for rendered sql queries in the "target/compile" folder', add_help=False)
34
+ compile_parser = subparsers.add_parser(c.COMPILE_CMD, help='Create rendered SQL files in the folder "./target/compile"', add_help=False)
35
+ compile_dataset_group = compile_parser.add_mutually_exclusive_group(required=True)
36
+ compile_test_set_group = compile_parser.add_mutually_exclusive_group(required=False)
35
37
  compile_parser.add_argument('-h', '--help', action="help", help="Show this help message and exit")
36
- compile_parser.add_argument('-d', '--dataset', type=str, help="Select dataset to use for dataset traits. If not specified, all models for all datasets are compiled")
37
- compile_parser.add_argument('-a', '--all-test-sets', action="store_true", help="Compile models for all selection test sets")
38
- compile_parser.add_argument('-t', '--test-set', type=str, help="The selection test set to use. Default selections are used if not specified. Ignored if using --all-test-sets")
39
- compile_parser.add_argument('-s', '--select', type=str, help="Select single model to compile. If not specified, all models for the dataset are compiled. Also, ignored if --dataset is not specified")
38
+
39
+ compile_dataset_group.add_argument('-d', '--dataset', type=str, help="Select dataset to use for dataset traits. Is required, unless using --all-datasets")
40
+ compile_dataset_group.add_argument('-D', '--all-datasets', action="store_true", help="Compile models for all datasets. Only required if --dataset is not specified")
41
+ compile_test_set_group.add_argument('-t', '--test-set', type=str, help="The selection test set to use. If not specified, default selections are used, unless using --all-test-sets")
42
+ compile_test_set_group.add_argument('-T', '--all-test-sets', action="store_true", help="Compile models for all selection test sets")
43
+
44
+ compile_parser.add_argument('-s', '--select', type=str, help="Select single model to compile. If not specified, all models for the dataset are compiled. Ignored if using --all-datasets")
40
45
  compile_parser.add_argument('-r', '--runquery', action='store_true', help='Runs all target models, and produce the results as csv files')
41
46
 
42
- run_parser = subparsers.add_parser(c.RUN_CMD, help='Run the builtin API server', add_help=False)
47
+ run_parser = subparsers.add_parser(c.RUN_CMD, help='Run the API server', add_help=False)
43
48
  run_parser.add_argument('-h', '--help', action="help", help="Show this help message and exit")
44
49
  run_parser.add_argument('--no-cache', action='store_true', help='Do not cache any api results')
45
- run_parser.add_argument('--debug', action='store_true', help='Show all "hidden parameters" in the parameters response')
46
50
  run_parser.add_argument('--host', type=str, default='127.0.0.1', help="The host to run on")
47
51
  run_parser.add_argument('--port', type=int, default=4465, help="The port to run on")
48
52
 
@@ -58,6 +62,7 @@ def main():
58
62
  from ._package_loader import PackageLoaderIO
59
63
  from ._connection_set import ConnectionSetIO
60
64
  from ._parameter_sets import ParameterConfigsSetIO
65
+ from ._seeds import SeedsIO
61
66
 
62
67
  if args.version:
63
68
  print(__version__)
@@ -68,18 +73,19 @@ def main():
68
73
  PackageLoaderIO.LoadPackages(reload=True)
69
74
  elif args.command in [c.RUN_CMD, c.COMPILE_CMD]:
70
75
  ManifestIO.LoadFromFile()
76
+ SeedsIO.LoadFiles()
71
77
  ConnectionSetIO.LoadFromFile()
72
78
  try:
73
79
  ParameterConfigsSetIO.LoadFromFile()
74
80
  ModelsIO.LoadFiles()
75
-
76
81
  if args.command == c.RUN_CMD:
77
- server = ApiServer(args.no_cache, args.debug)
82
+ server = ApiServer(args.no_cache)
78
83
  server.run(args)
79
- pass
80
84
  elif args.command == c.COMPILE_CMD:
81
- task = ModelsIO.WriteOutputs(args.dataset, args.select, args.all_test_sets, args.test_set, args.runquery)
85
+ task = ModelsIO.WriteOutputs(args.dataset, args.all_datasets, args.select, args.test_set, args.all_test_sets, args.runquery)
82
86
  asyncio.run(task)
87
+ except KeyboardInterrupt:
88
+ pass
83
89
  finally:
84
90
  ConnectionSetIO.Dispose()
85
91
  elif args.command is None:
@@ -29,9 +29,9 @@ class ConnectionSet:
29
29
  raise u.ConfigurationError(f'Connection name "{conn_name}" was not configured') from e
30
30
  return connection_pool
31
31
 
32
- def run_sql_query_from_conn_name(self, query: str, conn_name: str) -> pd.DataFrame:
32
+ def run_sql_query_from_conn_name(self, query: str, conn_name: str, placeholders: dict = {}) -> pd.DataFrame:
33
33
  engine = self._get_engine(conn_name)
34
- df = pd.read_sql(query, engine)
34
+ df = pd.read_sql(query, engine, params=placeholders)
35
35
  return df
36
36
 
37
37
  def _dispose(self) -> None:
squirrels/_constants.py CHANGED
@@ -39,6 +39,7 @@ PARAMETER_ARGS_KEY = 'arguments'
39
39
  TEST_SETS_KEY = 'selection_test_sets'
40
40
  TEST_SET_NAME_KEY = 'name'
41
41
  DEFAULT_TEST_SET_NAME = 'default'
42
+ TEST_SET_DATASETS_KEY = 'datasets'
42
43
  TEST_SET_USER_ATTR_KEY = 'user_attributes'
43
44
  TEST_SET_PARAMETERS_KEY = 'parameters'
44
45
 
@@ -48,6 +49,7 @@ DATASET_LABEL_KEY = 'label'
48
49
  DATASET_MODEL_KEY = 'model'
49
50
  DATASET_PARAMETERS_KEY = 'parameters'
50
51
  DATASET_TRAITS_KEY = 'traits'
52
+ DATASET_DEFAULT_TEST_SET_KEY = 'default_test_set'
51
53
 
52
54
  DATASET_SCOPE_KEY = 'scope'
53
55
  PUBLIC_SCOPE = 'public'
@@ -94,7 +96,7 @@ FEDERATES_FOLDER = 'federates'
94
96
  FEDERATE_SQL_NAME = 'dataset_example.sql'
95
97
  FEDERATE_PY_NAME = 'dataset_example.py'
96
98
 
97
- PYCONFIG_FOLDER = 'pyconfigs'
99
+ PYCONFIGS_FOLDER = 'pyconfigs'
98
100
  AUTH_FILE = 'auth.py'
99
101
  CONNECTIONS_FILE = 'connections.py'
100
102
  CONTEXT_FILE = 'context.py'
@@ -103,9 +105,9 @@ PARAMETERS_FILE = 'parameters.py'
103
105
  TARGET_FOLDER = 'target'
104
106
  COMPILE_FOLDER = 'compile'
105
107
 
106
- OUTPUTS_FOLDER = 'outputs'
107
- PARAMETERS_OUTPUT = 'parameters.json'
108
- FINAL_VIEW_OUT_STEM = 'final_view'
108
+ SEEDS_FOLDER = 'seeds'
109
+ CATEGORY_SEED_FILE = 'seed_categories.csv'
110
+ SUBCATEGORY_SEED_FILE = 'seed_subcategories.csv'
109
111
 
110
112
  # Dataset setting names
111
113
  AUTH_TOKEN_EXPIRE_SETTING = 'auth.token.expire_minutes'
@@ -116,6 +118,8 @@ RESULTS_CACHE_TTL_SETTING = 'results.cache.ttl_minutes'
116
118
  TEST_SET_DEFAULT_USED_SETTING = 'selection_test_sets.default_name_used'
117
119
  DB_CONN_DEFAULT_USED_SETTING = 'connections.default_name_used'
118
120
  DEFAULT_MATERIALIZE_SETTING = 'defaults.federates.materialized'
121
+ SEEDS_INFER_SCHEMA_SETTING = 'seeds.infer_schema'
122
+ SEEDS_NA_VALUES_SETTING = 'seeds.na_values'
119
123
  IN_MEMORY_DB_SETTING = 'in_memory_database'
120
124
  SQLITE = 'sqlite'
121
125
  DUCKDB = 'duckdb'
@@ -138,9 +142,13 @@ CONF_FORMAT_CHOICES2 = [(PYTHON_FORMAT2, PYTHON_FORMAT), YML_FORMAT]
138
142
 
139
143
  EXPENSES_DB_NAME = 'expenses'
140
144
  WEATHER_DB_NAME = 'weather'
141
- DATABASE_CHOICES = [EXPENSES_DB_NAME, WEATHER_DB_NAME]
145
+ NO_DB = 'none'
146
+ DATABASE_CHOICES = [EXPENSES_DB_NAME, WEATHER_DB_NAME, NO_DB]
142
147
 
143
148
  # Function names
144
149
  GET_USER_FUNC = "get_user_if_valid"
145
150
  DEP_FUNC = "dependencies"
146
151
  MAIN_FUNC = "main"
152
+
153
+ # Regex
154
+ date_regex = r'^\d{4}\-\d{2}\-\d{2}$'