squirrels 0.5.0b3__py3-none-any.whl → 0.6.0.post0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- squirrels/__init__.py +4 -0
- squirrels/_api_routes/__init__.py +5 -0
- squirrels/_api_routes/auth.py +337 -0
- squirrels/_api_routes/base.py +196 -0
- squirrels/_api_routes/dashboards.py +156 -0
- squirrels/_api_routes/data_management.py +148 -0
- squirrels/_api_routes/datasets.py +220 -0
- squirrels/_api_routes/project.py +289 -0
- squirrels/_api_server.py +440 -792
- squirrels/_arguments/__init__.py +0 -0
- squirrels/_arguments/{_init_time_args.py → init_time_args.py} +23 -43
- squirrels/_arguments/{_run_time_args.py → run_time_args.py} +32 -68
- squirrels/_auth.py +590 -264
- squirrels/_command_line.py +130 -58
- squirrels/_compile_prompts.py +147 -0
- squirrels/_connection_set.py +16 -15
- squirrels/_constants.py +36 -11
- squirrels/_dashboards.py +179 -0
- squirrels/_data_sources.py +40 -34
- squirrels/_dataset_types.py +16 -11
- squirrels/_env_vars.py +209 -0
- squirrels/_exceptions.py +9 -37
- squirrels/_http_error_responses.py +52 -0
- squirrels/_initializer.py +7 -6
- squirrels/_logging.py +121 -0
- squirrels/_manifest.py +155 -77
- squirrels/_mcp_server.py +578 -0
- squirrels/_model_builder.py +11 -55
- squirrels/_model_configs.py +5 -5
- squirrels/_model_queries.py +1 -1
- squirrels/_models.py +276 -143
- squirrels/_package_data/base_project/.env +1 -24
- squirrels/_package_data/base_project/.env.example +31 -17
- squirrels/_package_data/base_project/connections.yml +4 -3
- squirrels/_package_data/base_project/dashboards/dashboard_example.py +13 -7
- squirrels/_package_data/base_project/dashboards/dashboard_example.yml +6 -6
- squirrels/_package_data/base_project/docker/Dockerfile +2 -2
- squirrels/_package_data/base_project/docker/compose.yml +1 -1
- squirrels/_package_data/base_project/duckdb_init.sql +1 -0
- squirrels/_package_data/base_project/models/builds/build_example.py +2 -2
- squirrels/_package_data/base_project/models/dbviews/dbview_example.sql +7 -2
- squirrels/_package_data/base_project/models/dbviews/dbview_example.yml +16 -10
- squirrels/_package_data/base_project/models/federates/federate_example.py +27 -17
- squirrels/_package_data/base_project/models/federates/federate_example.sql +3 -7
- squirrels/_package_data/base_project/models/federates/federate_example.yml +7 -7
- squirrels/_package_data/base_project/models/sources.yml +5 -6
- squirrels/_package_data/base_project/parameters.yml +24 -38
- squirrels/_package_data/base_project/pyconfigs/connections.py +8 -3
- squirrels/_package_data/base_project/pyconfigs/context.py +26 -14
- squirrels/_package_data/base_project/pyconfigs/parameters.py +124 -81
- squirrels/_package_data/base_project/pyconfigs/user.py +48 -15
- squirrels/_package_data/base_project/resources/public/.gitkeep +0 -0
- squirrels/_package_data/base_project/seeds/seed_categories.yml +1 -1
- squirrels/_package_data/base_project/seeds/seed_subcategories.yml +1 -1
- squirrels/_package_data/base_project/squirrels.yml.j2 +21 -31
- squirrels/_package_data/templates/login_successful.html +53 -0
- squirrels/_package_data/templates/squirrels_studio.html +22 -0
- squirrels/_parameter_configs.py +43 -22
- squirrels/_parameter_options.py +1 -1
- squirrels/_parameter_sets.py +41 -30
- squirrels/_parameters.py +560 -123
- squirrels/_project.py +487 -277
- squirrels/_py_module.py +71 -10
- squirrels/_request_context.py +33 -0
- squirrels/_schemas/__init__.py +0 -0
- squirrels/_schemas/auth_models.py +83 -0
- squirrels/_schemas/query_param_models.py +70 -0
- squirrels/_schemas/request_models.py +26 -0
- squirrels/_schemas/response_models.py +286 -0
- squirrels/_seeds.py +52 -13
- squirrels/_sources.py +29 -23
- squirrels/_utils.py +221 -42
- squirrels/_version.py +1 -3
- squirrels/arguments.py +7 -2
- squirrels/auth.py +4 -0
- squirrels/connections.py +2 -0
- squirrels/dashboards.py +3 -1
- squirrels/data_sources.py +6 -0
- squirrels/parameter_options.py +5 -0
- squirrels/parameters.py +5 -0
- squirrels/types.py +10 -3
- squirrels-0.6.0.post0.dist-info/METADATA +148 -0
- squirrels-0.6.0.post0.dist-info/RECORD +101 -0
- {squirrels-0.5.0b3.dist-info → squirrels-0.6.0.post0.dist-info}/WHEEL +1 -1
- squirrels/_api_response_models.py +0 -190
- squirrels/_dashboard_types.py +0 -82
- squirrels/_dashboards_io.py +0 -79
- squirrels-0.5.0b3.dist-info/METADATA +0 -110
- squirrels-0.5.0b3.dist-info/RECORD +0 -80
- /squirrels/_package_data/base_project/{assets → resources}/expenses.db +0 -0
- /squirrels/_package_data/base_project/{assets → resources}/weather.db +0 -0
- {squirrels-0.5.0b3.dist-info → squirrels-0.6.0.post0.dist-info}/entry_points.txt +0 -0
- {squirrels-0.5.0b3.dist-info → squirrels-0.6.0.post0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dashboard routes for parameters and results
|
|
3
|
+
"""
|
|
4
|
+
from typing import Callable, Coroutine, Any
|
|
5
|
+
from fastapi import FastAPI, Depends, Request
|
|
6
|
+
from fastapi.responses import Response, HTMLResponse
|
|
7
|
+
from fastapi.security import HTTPBearer
|
|
8
|
+
from dataclasses import asdict
|
|
9
|
+
from cachetools import TTLCache
|
|
10
|
+
import time
|
|
11
|
+
|
|
12
|
+
from .. import _constants as c, _utils as u
|
|
13
|
+
from .._schemas import response_models as rm
|
|
14
|
+
from .._exceptions import ConfigurationError
|
|
15
|
+
from .._dashboards import Dashboard
|
|
16
|
+
from .._schemas.query_param_models import get_query_models_for_parameters, get_query_models_for_dashboard
|
|
17
|
+
from .._schemas.auth_models import AbstractUser
|
|
18
|
+
from .base import RouteBase
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DashboardRoutes(RouteBase):
|
|
22
|
+
"""Dashboard parameter and result routes"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, get_bearer_token: HTTPBearer, project, no_cache: bool = False):
|
|
25
|
+
super().__init__(get_bearer_token, project, no_cache)
|
|
26
|
+
|
|
27
|
+
# Setup caches
|
|
28
|
+
self.dashboard_results_cache = TTLCache(
|
|
29
|
+
maxsize=self.env_vars.dashboards_cache_size,
|
|
30
|
+
ttl=self.env_vars.dashboards_cache_ttl_minutes*60
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
async def _get_dashboard_results_helper(
|
|
34
|
+
self, dashboard: str, user: AbstractUser, selections: tuple[tuple[str, Any], ...], configurables: tuple[tuple[str, str], ...]
|
|
35
|
+
) -> Dashboard:
|
|
36
|
+
"""Helper to get dashboard results"""
|
|
37
|
+
cfg_filtered = {k: v for k, v in dict(configurables).items() if k in self.manifest_cfg.configurables}
|
|
38
|
+
return await self.project.dashboard(dashboard, user=user, selections=dict(selections), configurables=cfg_filtered)
|
|
39
|
+
|
|
40
|
+
async def _get_dashboard_results_cachable(
|
|
41
|
+
self, dashboard: str, user: AbstractUser, selections: tuple[tuple[str, Any], ...], configurables: tuple[tuple[str, str], ...]
|
|
42
|
+
) -> Dashboard:
|
|
43
|
+
"""Cachable version of dashboard results helper"""
|
|
44
|
+
return await self.do_cachable_action(self.dashboard_results_cache, self._get_dashboard_results_helper, dashboard, user, selections, configurables)
|
|
45
|
+
|
|
46
|
+
async def _get_dashboard_results_definition(
|
|
47
|
+
self, dashboard_name: str, user: AbstractUser, params: dict, headers: dict[str, str]
|
|
48
|
+
) -> Response:
|
|
49
|
+
"""Get dashboard results definition"""
|
|
50
|
+
get_dashboard_function = self._get_dashboard_results_helper if self.no_cache else self._get_dashboard_results_cachable
|
|
51
|
+
selections = self.get_selections_as_immutable(params, uncached_keys=set())
|
|
52
|
+
|
|
53
|
+
user_has_elevated_privileges = u.user_has_elevated_privileges(user.access_level, self.env_vars.elevated_access_level)
|
|
54
|
+
configurables = self.get_configurables_from_headers(headers) if user_has_elevated_privileges else tuple()
|
|
55
|
+
dashboard_obj = await get_dashboard_function(dashboard_name, user, selections, configurables)
|
|
56
|
+
|
|
57
|
+
if dashboard_obj._format == c.PNG:
|
|
58
|
+
assert isinstance(dashboard_obj._content, bytes)
|
|
59
|
+
result = Response(dashboard_obj._content, media_type="image/png")
|
|
60
|
+
elif dashboard_obj._format == c.HTML:
|
|
61
|
+
result = HTMLResponse(dashboard_obj._content)
|
|
62
|
+
else:
|
|
63
|
+
raise NotImplementedError()
|
|
64
|
+
return result
|
|
65
|
+
|
|
66
|
+
def setup_routes(
|
|
67
|
+
self, app: FastAPI, param_fields: dict,
|
|
68
|
+
get_parameters_definition: Callable[..., Coroutine[Any, Any, rm.ParametersModel]]
|
|
69
|
+
) -> None:
|
|
70
|
+
"""Setup dashboard routes"""
|
|
71
|
+
|
|
72
|
+
dashboard_results_path = '/dashboards/{dashboard}'
|
|
73
|
+
dashboard_parameters_path = dashboard_results_path + '/parameters'
|
|
74
|
+
|
|
75
|
+
def validate_parameters_list(parameters: list[str] | None, entity_type: str, dashboard_name: str) -> None:
|
|
76
|
+
if parameters is None:
|
|
77
|
+
return
|
|
78
|
+
for param in parameters:
|
|
79
|
+
if param not in param_fields:
|
|
80
|
+
all_params = list(param_fields.keys())
|
|
81
|
+
raise ConfigurationError(
|
|
82
|
+
f"{entity_type} '{dashboard_name}' use parameter '{param}' which doesn't exist. Available parameters are:"
|
|
83
|
+
f"\n {all_params}"
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# Dashboard parameters and results APIs
|
|
87
|
+
for dashboard_name, dashboard in self.project._dashboards.items():
|
|
88
|
+
dashboard_name_for_api = u.normalize_name_for_api(dashboard_name)
|
|
89
|
+
curr_parameters_path = dashboard_parameters_path.format(dashboard=dashboard_name_for_api)
|
|
90
|
+
curr_results_path = dashboard_results_path.format(dashboard=dashboard_name_for_api)
|
|
91
|
+
|
|
92
|
+
validate_parameters_list(dashboard.config.parameters, "Dashboard", dashboard_name)
|
|
93
|
+
|
|
94
|
+
QueryModelForGetParams, QueryModelForPostParams = get_query_models_for_parameters(param_fields, dashboard.config.parameters)
|
|
95
|
+
QueryModelForGetDash, QueryModelForPostDash = get_query_models_for_dashboard(param_fields, dashboard.config.parameters)
|
|
96
|
+
|
|
97
|
+
@app.get(curr_parameters_path, tags=[f"Dashboard '{dashboard_name}'"], description=self._parameters_description)
|
|
98
|
+
async def get_dashboard_parameters(
|
|
99
|
+
request: Request, params: QueryModelForGetParams, user=Depends(self.get_current_user)
|
|
100
|
+
) -> rm.ParametersModel:
|
|
101
|
+
start = time.time()
|
|
102
|
+
curr_dashboard_name = self.get_name_from_path_section(request, -2)
|
|
103
|
+
parameters_list = self.project._dashboards[curr_dashboard_name].config.parameters
|
|
104
|
+
scope = self.project._dashboards[curr_dashboard_name].config.scope
|
|
105
|
+
result = await get_parameters_definition(
|
|
106
|
+
parameters_list, "dashboard", curr_dashboard_name, scope, user, asdict(params)
|
|
107
|
+
)
|
|
108
|
+
self.logger.log_activity_time(
|
|
109
|
+
"GET REQUEST for PARAMETERS", start, additional_data={"dashboard_name": curr_dashboard_name}
|
|
110
|
+
)
|
|
111
|
+
return result
|
|
112
|
+
|
|
113
|
+
@app.post(curr_parameters_path, tags=[f"Dashboard '{dashboard_name}'"], description=self._parameters_description)
|
|
114
|
+
async def get_dashboard_parameters_with_post(
|
|
115
|
+
request: Request, params: QueryModelForPostParams, user=Depends(self.get_current_user)
|
|
116
|
+
) -> rm.ParametersModel:
|
|
117
|
+
start = time.time()
|
|
118
|
+
curr_dashboard_name = self.get_name_from_path_section(request, -2)
|
|
119
|
+
parameters_list = self.project._dashboards[curr_dashboard_name].config.parameters
|
|
120
|
+
scope = self.project._dashboards[curr_dashboard_name].config.scope
|
|
121
|
+
result = await get_parameters_definition(
|
|
122
|
+
parameters_list, "dashboard", curr_dashboard_name, scope, user, params.model_dump()
|
|
123
|
+
)
|
|
124
|
+
self.logger.log_activity_time(
|
|
125
|
+
"POST REQUEST for PARAMETERS", start, additional_data={"dashboard_name": curr_dashboard_name}
|
|
126
|
+
)
|
|
127
|
+
return result
|
|
128
|
+
|
|
129
|
+
@app.get(curr_results_path, tags=[f"Dashboard '{dashboard_name}'"], description=dashboard.config.description)
|
|
130
|
+
async def get_dashboard_results(
|
|
131
|
+
request: Request, params: QueryModelForGetDash, user=Depends(self.get_current_user)
|
|
132
|
+
) -> Response:
|
|
133
|
+
start = time.time()
|
|
134
|
+
curr_dashboard_name = self.get_name_from_path_section(request, -1)
|
|
135
|
+
result = await self._get_dashboard_results_definition(
|
|
136
|
+
curr_dashboard_name, user, asdict(params), headers=dict(request.headers)
|
|
137
|
+
)
|
|
138
|
+
self.logger.log_activity_time(
|
|
139
|
+
"GET REQUEST for DASHBOARD RESULTS", start, additional_data={"dashboard_name": curr_dashboard_name}
|
|
140
|
+
)
|
|
141
|
+
return result
|
|
142
|
+
|
|
143
|
+
@app.post(curr_results_path, tags=[f"Dashboard '{dashboard_name}'"], description=dashboard.config.description, response_class=Response)
|
|
144
|
+
async def get_dashboard_results_with_post(
|
|
145
|
+
request: Request, params: QueryModelForPostDash, user=Depends(self.get_current_user)
|
|
146
|
+
) -> Response:
|
|
147
|
+
start = time.time()
|
|
148
|
+
curr_dashboard_name = self.get_name_from_path_section(request, -1)
|
|
149
|
+
result = await self._get_dashboard_results_definition(
|
|
150
|
+
curr_dashboard_name, user, params.model_dump(), headers=dict(request.headers)
|
|
151
|
+
)
|
|
152
|
+
self.logger.log_activity_time(
|
|
153
|
+
"POST REQUEST for DASHBOARD RESULTS", start, additional_data={"dashboard_name": curr_dashboard_name}
|
|
154
|
+
)
|
|
155
|
+
return result
|
|
156
|
+
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Data management routes for build and query models
|
|
3
|
+
"""
|
|
4
|
+
from typing import Any, Annotated
|
|
5
|
+
from fastapi import FastAPI, Depends, Request, Response, status, Path
|
|
6
|
+
from fastapi.responses import JSONResponse
|
|
7
|
+
from fastapi.security import HTTPBearer
|
|
8
|
+
from dataclasses import asdict
|
|
9
|
+
from cachetools import TTLCache
|
|
10
|
+
import time
|
|
11
|
+
|
|
12
|
+
from .. import _constants as c, _utils as u
|
|
13
|
+
from .._schemas import response_models as rm
|
|
14
|
+
from .._exceptions import InvalidInputError
|
|
15
|
+
from .._schemas.auth_models import AbstractUser
|
|
16
|
+
from .._dataset_types import DatasetResult
|
|
17
|
+
from .._schemas.query_param_models import get_query_models_for_querying_models, get_query_models_for_compiled_models
|
|
18
|
+
from .base import RouteBase
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DataManagementRoutes(RouteBase):
|
|
22
|
+
"""Data management routes for build and query operations"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, get_bearer_token: HTTPBearer, project, no_cache: bool = False):
|
|
25
|
+
super().__init__(get_bearer_token, project, no_cache)
|
|
26
|
+
|
|
27
|
+
# Setup cache (same settings as dataset results cache)
|
|
28
|
+
self.query_models_cache = TTLCache(
|
|
29
|
+
maxsize=self.env_vars.datasets_cache_size,
|
|
30
|
+
ttl=self.env_vars.datasets_cache_ttl_minutes*60
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
async def _query_models_helper(
|
|
34
|
+
self, sql_query: str, user: AbstractUser, selections: tuple[tuple[str, Any], ...], configurables: tuple[tuple[str, str], ...]
|
|
35
|
+
) -> DatasetResult:
|
|
36
|
+
"""Helper to query models"""
|
|
37
|
+
cfg_filtered = {k: v for k, v in dict(configurables).items() if k in self.manifest_cfg.configurables}
|
|
38
|
+
return await self.project.query_models(sql_query, user=user, selections=dict(selections), configurables=cfg_filtered)
|
|
39
|
+
|
|
40
|
+
async def _query_models_cachable(
|
|
41
|
+
self, sql_query: str, user: AbstractUser, selections: tuple[tuple[str, Any], ...], configurables: tuple[tuple[str, str], ...]
|
|
42
|
+
) -> DatasetResult:
|
|
43
|
+
"""Cachable version of query models helper"""
|
|
44
|
+
return await self.do_cachable_action(self.query_models_cache, self._query_models_helper, sql_query, user, selections, configurables)
|
|
45
|
+
|
|
46
|
+
async def _query_models_definition(
|
|
47
|
+
self, user: AbstractUser, params: dict, *, headers: dict[str, str]
|
|
48
|
+
) -> rm.DatasetResultModel:
|
|
49
|
+
"""Query models definition"""
|
|
50
|
+
if not u.user_has_elevated_privileges(user.access_level, self.env_vars.elevated_access_level):
|
|
51
|
+
raise InvalidInputError(403, "unauthorized_access_to_query_models", f"User '{user}' does not have permission to query data models")
|
|
52
|
+
|
|
53
|
+
sql_query = params.get("x_sql_query")
|
|
54
|
+
if sql_query is None:
|
|
55
|
+
raise InvalidInputError(400, "sql_query_required", "SQL query must be provided")
|
|
56
|
+
|
|
57
|
+
query_models_function = self._query_models_helper if self.no_cache else self._query_models_cachable
|
|
58
|
+
uncached_keys = {"x_sql_query", "x_orientation", "x_offset", "x_limit"}
|
|
59
|
+
selections = self.get_selections_as_immutable(params, uncached_keys)
|
|
60
|
+
configurables = self.get_configurables_from_headers(headers)
|
|
61
|
+
result = await query_models_function(sql_query, user, selections, configurables)
|
|
62
|
+
|
|
63
|
+
result_format = self.extract_orientation_offset_and_limit(params)
|
|
64
|
+
return rm.DatasetResultModel(**result.to_json(result_format))
|
|
65
|
+
|
|
66
|
+
async def _get_compiled_model_definition(
|
|
67
|
+
self, model_name: str, user: AbstractUser, params: dict, *, headers: dict[str, str]
|
|
68
|
+
) -> rm.CompiledQueryModel:
|
|
69
|
+
"""Get compiled model definition"""
|
|
70
|
+
normalized_model_name = u.normalize_name(model_name)
|
|
71
|
+
# self._validate_request_params(all_request_params, params, headers)
|
|
72
|
+
|
|
73
|
+
# Internal users only
|
|
74
|
+
if not u.user_has_elevated_privileges(user.access_level, self.env_vars.elevated_access_level):
|
|
75
|
+
raise InvalidInputError(403, "unauthorized_access_to_compile_model", f"User '{user}' does not have permission to fetch compiled SQL")
|
|
76
|
+
|
|
77
|
+
selections = self.get_selections_as_immutable(params, uncached_keys=set())
|
|
78
|
+
configurables = self.get_configurables_from_headers(headers)
|
|
79
|
+
cfg_filtered = {k: v for k, v in dict(configurables).items() if k in self.manifest_cfg.configurables}
|
|
80
|
+
return await self.project.get_compiled_model_query(normalized_model_name, user=user, selections=dict(selections), configurables=cfg_filtered)
|
|
81
|
+
|
|
82
|
+
def setup_routes(self, app: FastAPI, param_fields: dict) -> None:
|
|
83
|
+
"""Setup data management routes"""
|
|
84
|
+
|
|
85
|
+
# Build project endpoint
|
|
86
|
+
build_path = '/build'
|
|
87
|
+
|
|
88
|
+
@app.post(build_path, tags=["Data Management"], summary="Build or update the Virtual Data Lake (VDL) for the project")
|
|
89
|
+
async def build(user=Depends(self.get_current_user)) -> Response:
|
|
90
|
+
if not u.user_has_elevated_privileges(user.access_level, self.env_vars.elevated_access_level):
|
|
91
|
+
raise InvalidInputError(403, "unauthorized_access_to_build_model", f"User '{user}' does not have permission to build the virtual data lake (VDL)")
|
|
92
|
+
await self.project.build()
|
|
93
|
+
return Response(status_code=status.HTTP_200_OK)
|
|
94
|
+
|
|
95
|
+
# Query result endpoints
|
|
96
|
+
query_models_path = '/query-result'
|
|
97
|
+
QueryModelForQueryModels, QueryModelForPostQueryModels = get_query_models_for_querying_models(param_fields)
|
|
98
|
+
|
|
99
|
+
@app.get(query_models_path, tags=["Data Management"], response_class=JSONResponse)
|
|
100
|
+
async def query_models(
|
|
101
|
+
request: Request, params: QueryModelForQueryModels, user=Depends(self.get_current_user)
|
|
102
|
+
) -> rm.DatasetResultModel:
|
|
103
|
+
start = time.time()
|
|
104
|
+
result = await self._query_models_definition(user, asdict(params), headers=dict(request.headers))
|
|
105
|
+
self.logger.log_activity_time("GET REQUEST for QUERY MODELS", start)
|
|
106
|
+
return result
|
|
107
|
+
|
|
108
|
+
@app.post(query_models_path, tags=["Data Management"], response_class=JSONResponse)
|
|
109
|
+
async def query_models_with_post(
|
|
110
|
+
request: Request, params: QueryModelForPostQueryModels, user=Depends(self.get_current_user)
|
|
111
|
+
) -> rm.DatasetResultModel:
|
|
112
|
+
start = time.time()
|
|
113
|
+
result = await self._query_models_definition(user, params.model_dump(), headers=dict(request.headers))
|
|
114
|
+
self.logger.log_activity_time("POST REQUEST for QUERY MODELS", start)
|
|
115
|
+
return result
|
|
116
|
+
|
|
117
|
+
# Compiled models endpoints
|
|
118
|
+
compiled_models_path = '/compiled-models/{model_name}'
|
|
119
|
+
QueryModelForGetCompiled, QueryModelForPostCompiled = get_query_models_for_compiled_models(param_fields)
|
|
120
|
+
|
|
121
|
+
@app.get(compiled_models_path, tags=["Data Management"], response_class=JSONResponse, summary="Get compiled definition for a model")
|
|
122
|
+
async def get_compiled_model(
|
|
123
|
+
request: Request,
|
|
124
|
+
model_name: Annotated[str, Path(description="The name of the model. Both snake case (with underscores) and kebab case (with dashes) are supported")],
|
|
125
|
+
params: QueryModelForGetCompiled,
|
|
126
|
+
user=Depends(self.get_current_user)
|
|
127
|
+
) -> rm.CompiledQueryModel:
|
|
128
|
+
start = time.time()
|
|
129
|
+
result = await self._get_compiled_model_definition(model_name, user, asdict(params), headers=dict(request.headers))
|
|
130
|
+
self.logger.log_activity_time(
|
|
131
|
+
"GET REQUEST for GET COMPILED MODEL", start, additional_data={"model_name": model_name}
|
|
132
|
+
)
|
|
133
|
+
return result
|
|
134
|
+
|
|
135
|
+
@app.post(compiled_models_path, tags=["Data Management"], response_class=JSONResponse, summary="Get compiled definition for a model")
|
|
136
|
+
async def get_compiled_model_with_post(
|
|
137
|
+
request: Request,
|
|
138
|
+
model_name: Annotated[str, Path(description="The name of the model. Both snake case (with underscores) and kebab case (with dashes) are supported")],
|
|
139
|
+
params: QueryModelForPostCompiled,
|
|
140
|
+
user=Depends(self.get_current_user)
|
|
141
|
+
) -> rm.CompiledQueryModel:
|
|
142
|
+
start = time.time()
|
|
143
|
+
result = await self._get_compiled_model_definition(model_name, user, params.model_dump(), headers=dict(request.headers))
|
|
144
|
+
self.logger.log_activity_time(
|
|
145
|
+
"POST REQUEST for GET COMPILED MODEL", start, additional_data={"model_name": model_name}
|
|
146
|
+
)
|
|
147
|
+
return result
|
|
148
|
+
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dataset routes for parameters and results
|
|
3
|
+
"""
|
|
4
|
+
from typing import Callable, Coroutine, Any
|
|
5
|
+
from fastapi import FastAPI, Depends, Request
|
|
6
|
+
from fastapi.responses import JSONResponse
|
|
7
|
+
from fastapi.security import HTTPBearer
|
|
8
|
+
from dataclasses import asdict
|
|
9
|
+
from cachetools import TTLCache
|
|
10
|
+
|
|
11
|
+
import time
|
|
12
|
+
import polars as pl
|
|
13
|
+
|
|
14
|
+
from .. import _utils as u
|
|
15
|
+
from .._schemas import response_models as rm
|
|
16
|
+
from .._exceptions import ConfigurationError, InvalidInputError
|
|
17
|
+
from .._dataset_types import DatasetResult
|
|
18
|
+
from .._schemas.query_param_models import get_query_models_for_parameters, get_query_models_for_dataset
|
|
19
|
+
from .._schemas.auth_models import AbstractUser
|
|
20
|
+
from .base import RouteBase
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class DatasetRoutes(RouteBase):
|
|
24
|
+
"""Dataset parameter and result routes"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, get_bearer_token: HTTPBearer, project, no_cache: bool = False):
|
|
27
|
+
super().__init__(get_bearer_token, project, no_cache)
|
|
28
|
+
|
|
29
|
+
# Setup caches
|
|
30
|
+
self.dataset_results_cache = TTLCache(
|
|
31
|
+
maxsize=self.env_vars.datasets_cache_size,
|
|
32
|
+
ttl=self.env_vars.datasets_cache_ttl_minutes*60
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Setup max rows
|
|
36
|
+
self.max_result_rows = self.env_vars.datasets_max_rows_output
|
|
37
|
+
|
|
38
|
+
# Setup SQL query timeout
|
|
39
|
+
self.sql_timeout_seconds = self.env_vars.datasets_sql_timeout_seconds
|
|
40
|
+
|
|
41
|
+
async def _get_dataset_results_helper(
|
|
42
|
+
self, dataset: str, user: AbstractUser, selections: tuple[tuple[str, Any], ...], configurables: tuple[tuple[str, str], ...]
|
|
43
|
+
) -> DatasetResult:
|
|
44
|
+
"""Helper to get dataset results"""
|
|
45
|
+
# Only pass configurables that are defined in manifest
|
|
46
|
+
cfg_filtered = {k: v for k, v in dict(configurables).items() if k in self.manifest_cfg.configurables}
|
|
47
|
+
return await self.project._dataset_result(dataset, user=user, selections=dict(selections), configurables=cfg_filtered)
|
|
48
|
+
|
|
49
|
+
async def _get_dataset_results_cachable(
|
|
50
|
+
self, dataset: str, user: AbstractUser, selections: tuple[tuple[str, Any], ...], configurables: tuple[tuple[str, str], ...]
|
|
51
|
+
) -> DatasetResult:
|
|
52
|
+
"""Cachable version of dataset results helper"""
|
|
53
|
+
return await self.do_cachable_action(
|
|
54
|
+
self.dataset_results_cache, self._get_dataset_results_helper, dataset, user, selections, configurables
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
async def _get_dataset_result_object(
|
|
58
|
+
self, dataset_name: str, user: AbstractUser, params: dict, configurables: tuple[tuple[str, str], ...]
|
|
59
|
+
) -> DatasetResult:
|
|
60
|
+
"""Get dataset result object"""
|
|
61
|
+
# self._validate_request_params(all_request_params, params, headers)
|
|
62
|
+
|
|
63
|
+
get_dataset_function = self._get_dataset_results_helper if self.no_cache else self._get_dataset_results_cachable
|
|
64
|
+
uncached_keys = {"x_sql_query", "x_orientation", "x_offset", "x_limit"}
|
|
65
|
+
selections = self.get_selections_as_immutable(params, uncached_keys)
|
|
66
|
+
|
|
67
|
+
user_has_elevated_privileges = u.user_has_elevated_privileges(user.access_level, self.env_vars.elevated_access_level)
|
|
68
|
+
configurables_if_elevated: tuple[tuple[str, str], ...] = configurables if user_has_elevated_privileges else tuple()
|
|
69
|
+
result = await get_dataset_function(dataset_name, user, selections, configurables_if_elevated)
|
|
70
|
+
|
|
71
|
+
# Apply optional final SQL transformation before select/limit/offset
|
|
72
|
+
sql_query = params.get("x_sql_query")
|
|
73
|
+
if sql_query:
|
|
74
|
+
try:
|
|
75
|
+
transformed = await u.run_polars_sql_on_dataframes(
|
|
76
|
+
sql_query, {"result": result.df.lazy()}, timeout_seconds=self.sql_timeout_seconds, max_rows=self.max_result_rows+1
|
|
77
|
+
)
|
|
78
|
+
except Exception as e:
|
|
79
|
+
raise InvalidInputError(400, "invalid_sql_query", "Failed to run provided Polars SQL on the dataset result") from e
|
|
80
|
+
|
|
81
|
+
# Enforce max result rows on transformed result
|
|
82
|
+
row_count = transformed.select(pl.len()).item()
|
|
83
|
+
if row_count > self.max_result_rows:
|
|
84
|
+
raise InvalidInputError(
|
|
85
|
+
413,
|
|
86
|
+
"dataset_result_too_large",
|
|
87
|
+
f"The transformed dataset result exceeds the maximum allowed of {self.max_result_rows} rows."
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
transformed = transformed.drop("_row_num", strict=False).with_row_index("_row_num", offset=1)
|
|
91
|
+
result = DatasetResult(target_model_config=result.target_model_config, df=transformed)
|
|
92
|
+
|
|
93
|
+
return result
|
|
94
|
+
|
|
95
|
+
async def _get_dataset_results_definition(
|
|
96
|
+
self, dataset_name: str, user: AbstractUser, params: dict, headers: dict[str, str]
|
|
97
|
+
) -> rm.DatasetResultModel:
|
|
98
|
+
"""Get dataset results definition"""
|
|
99
|
+
configurables = self.get_configurables_from_headers(headers)
|
|
100
|
+
result = await self._get_dataset_result_object(dataset_name, user, params, configurables)
|
|
101
|
+
|
|
102
|
+
result_format = self.extract_orientation_offset_and_limit(params)
|
|
103
|
+
return rm.DatasetResultModel(**result.to_json(result_format))
|
|
104
|
+
|
|
105
|
+
def setup_routes(
|
|
106
|
+
self, app: FastAPI, param_fields: dict,
|
|
107
|
+
get_parameters_definition: Callable[..., Coroutine[Any, Any, rm.ParametersModel]]
|
|
108
|
+
) -> None:
|
|
109
|
+
"""Setup dataset routes"""
|
|
110
|
+
|
|
111
|
+
dataset_results_path = '/datasets/{dataset}'
|
|
112
|
+
dataset_parameters_path = dataset_results_path + '/parameters'
|
|
113
|
+
|
|
114
|
+
def validate_parameters_list(parameters: list[str] | None, entity_type: str, dataset_name: str) -> None:
|
|
115
|
+
if parameters is None:
|
|
116
|
+
return
|
|
117
|
+
for param in parameters:
|
|
118
|
+
if param not in param_fields:
|
|
119
|
+
all_params = list(param_fields.keys())
|
|
120
|
+
raise ConfigurationError(
|
|
121
|
+
f"{entity_type} '{dataset_name}' use parameter '{param}' which doesn't exist. Available parameters are:"
|
|
122
|
+
f"\n {all_params}"
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
async def get_dataset_parameters_updates(dataset_name: str, user: AbstractUser, params: dict):
|
|
126
|
+
parameters_list = self.manifest_cfg.datasets[dataset_name].parameters
|
|
127
|
+
scope = self.manifest_cfg.datasets[dataset_name].scope
|
|
128
|
+
result = await get_parameters_definition(
|
|
129
|
+
parameters_list, "dataset", dataset_name, scope, user, params
|
|
130
|
+
)
|
|
131
|
+
return result
|
|
132
|
+
|
|
133
|
+
# Dataset parameters and results APIs
|
|
134
|
+
for dataset_name, dataset_config in self.manifest_cfg.datasets.items():
|
|
135
|
+
dataset_name_for_api = u.normalize_name_for_api(dataset_name)
|
|
136
|
+
curr_parameters_path = dataset_parameters_path.format(dataset=dataset_name_for_api)
|
|
137
|
+
curr_results_path = dataset_results_path.format(dataset=dataset_name_for_api)
|
|
138
|
+
|
|
139
|
+
validate_parameters_list(dataset_config.parameters, "Dataset", dataset_name)
|
|
140
|
+
|
|
141
|
+
QueryModelForGetParams, QueryModelForPostParams = get_query_models_for_parameters(param_fields, dataset_config.parameters)
|
|
142
|
+
QueryModelForGetDataset, QueryModelForPostDataset = get_query_models_for_dataset(param_fields, dataset_config.parameters)
|
|
143
|
+
|
|
144
|
+
@app.get(curr_parameters_path, tags=[f"Dataset '{dataset_name}'"], description=self._parameters_description, response_class=JSONResponse)
|
|
145
|
+
async def get_dataset_parameters(
|
|
146
|
+
request: Request, params: QueryModelForGetParams, user=Depends(self.get_current_user)
|
|
147
|
+
) -> rm.ParametersModel:
|
|
148
|
+
start = time.time()
|
|
149
|
+
curr_dataset_name = self.get_name_from_path_section(request, -2)
|
|
150
|
+
result = await get_dataset_parameters_updates(curr_dataset_name, user, asdict(params))
|
|
151
|
+
self.logger.log_activity_time(
|
|
152
|
+
"GET REQUEST for PARAMETERS", start, additional_data={"dataset_name": curr_dataset_name}
|
|
153
|
+
)
|
|
154
|
+
return result
|
|
155
|
+
|
|
156
|
+
@app.post(curr_parameters_path, tags=[f"Dataset '{dataset_name}'"], description=self._parameters_description, response_class=JSONResponse)
|
|
157
|
+
async def get_dataset_parameters_with_post(
|
|
158
|
+
request: Request, params: QueryModelForPostParams, user=Depends(self.get_current_user)
|
|
159
|
+
) -> rm.ParametersModel:
|
|
160
|
+
start = time.time()
|
|
161
|
+
curr_dataset_name = self.get_name_from_path_section(request, -2)
|
|
162
|
+
result = await get_dataset_parameters_updates(curr_dataset_name, user, params.model_dump())
|
|
163
|
+
self.logger.log_activity_time(
|
|
164
|
+
"POST REQUEST for PARAMETERS", start, additional_data={"dataset_name": curr_dataset_name}
|
|
165
|
+
)
|
|
166
|
+
return result
|
|
167
|
+
|
|
168
|
+
@app.get(curr_results_path, tags=[f"Dataset '{dataset_name}'"], description=dataset_config.description, response_class=JSONResponse)
|
|
169
|
+
async def get_dataset_results(
|
|
170
|
+
request: Request, params: QueryModelForGetDataset, user=Depends(self.get_current_user)
|
|
171
|
+
) -> rm.DatasetResultModel:
|
|
172
|
+
start = time.time()
|
|
173
|
+
curr_dataset_name = self.get_name_from_path_section(request, -1)
|
|
174
|
+
result = await self._get_dataset_results_definition(
|
|
175
|
+
curr_dataset_name, user, asdict(params), headers=dict(request.headers)
|
|
176
|
+
)
|
|
177
|
+
self.logger.log_activity_time(
|
|
178
|
+
"GET REQUEST for DATASET RESULTS", start, additional_data={"dataset_name": curr_dataset_name}
|
|
179
|
+
)
|
|
180
|
+
return result
|
|
181
|
+
|
|
182
|
+
@app.post(curr_results_path, tags=[f"Dataset '{dataset_name}'"], description=dataset_config.description, response_class=JSONResponse)
|
|
183
|
+
async def get_dataset_results_with_post(
|
|
184
|
+
request: Request, params: QueryModelForPostDataset, user=Depends(self.get_current_user)
|
|
185
|
+
) -> rm.DatasetResultModel:
|
|
186
|
+
start = time.time()
|
|
187
|
+
curr_dataset_name = self.get_name_from_path_section(request, -1)
|
|
188
|
+
result = await self._get_dataset_results_definition(
|
|
189
|
+
curr_dataset_name, user, params.model_dump(), headers=dict(request.headers)
|
|
190
|
+
)
|
|
191
|
+
self.logger.log_activity_time(
|
|
192
|
+
"POST REQUEST for DATASET RESULTS", start, additional_data={"dataset_name": curr_dataset_name}
|
|
193
|
+
)
|
|
194
|
+
return result
|
|
195
|
+
|
|
196
|
+
# MCP-callable methods (exposed as instance attributes for McpServerBuilder)
|
|
197
|
+
|
|
198
|
+
async def get_dataset_parameters_for_mcp(
|
|
199
|
+
dataset: str, parameter_name: str, selected_ids: str | list[str], user: AbstractUser
|
|
200
|
+
) -> rm.ParametersModel:
|
|
201
|
+
"""Get dataset parameter updates for MCP tools. Takes user and headers."""
|
|
202
|
+
dataset_name = u.normalize_name(dataset)
|
|
203
|
+
parameters = {
|
|
204
|
+
"x_parent_param": parameter_name,
|
|
205
|
+
parameter_name: selected_ids
|
|
206
|
+
}
|
|
207
|
+
return await get_dataset_parameters_updates(dataset_name, user, parameters)
|
|
208
|
+
|
|
209
|
+
async def get_dataset_results_for_mcp(
|
|
210
|
+
dataset: str, parameters: dict[str, Any], sql_query: str | None, user: AbstractUser, configurables: tuple[tuple[str, str], ...]
|
|
211
|
+
) -> DatasetResult:
|
|
212
|
+
"""Get dataset results for MCP tools. Takes user and configurables."""
|
|
213
|
+
dataset_name = u.normalize_name(dataset)
|
|
214
|
+
parameters.update({ "x_sql_query": sql_query })
|
|
215
|
+
return await self._get_dataset_result_object(dataset_name, user, parameters, configurables)
|
|
216
|
+
|
|
217
|
+
# Store the MCP functions as instance attributes for access by McpServerBuilder
|
|
218
|
+
self._get_dataset_parameters_for_mcp = get_dataset_parameters_for_mcp
|
|
219
|
+
self._get_dataset_results_for_mcp = get_dataset_results_for_mcp
|
|
220
|
+
|