squirrels 0.5.0b3__py3-none-any.whl → 0.6.0.post0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. squirrels/__init__.py +4 -0
  2. squirrels/_api_routes/__init__.py +5 -0
  3. squirrels/_api_routes/auth.py +337 -0
  4. squirrels/_api_routes/base.py +196 -0
  5. squirrels/_api_routes/dashboards.py +156 -0
  6. squirrels/_api_routes/data_management.py +148 -0
  7. squirrels/_api_routes/datasets.py +220 -0
  8. squirrels/_api_routes/project.py +289 -0
  9. squirrels/_api_server.py +440 -792
  10. squirrels/_arguments/__init__.py +0 -0
  11. squirrels/_arguments/{_init_time_args.py → init_time_args.py} +23 -43
  12. squirrels/_arguments/{_run_time_args.py → run_time_args.py} +32 -68
  13. squirrels/_auth.py +590 -264
  14. squirrels/_command_line.py +130 -58
  15. squirrels/_compile_prompts.py +147 -0
  16. squirrels/_connection_set.py +16 -15
  17. squirrels/_constants.py +36 -11
  18. squirrels/_dashboards.py +179 -0
  19. squirrels/_data_sources.py +40 -34
  20. squirrels/_dataset_types.py +16 -11
  21. squirrels/_env_vars.py +209 -0
  22. squirrels/_exceptions.py +9 -37
  23. squirrels/_http_error_responses.py +52 -0
  24. squirrels/_initializer.py +7 -6
  25. squirrels/_logging.py +121 -0
  26. squirrels/_manifest.py +155 -77
  27. squirrels/_mcp_server.py +578 -0
  28. squirrels/_model_builder.py +11 -55
  29. squirrels/_model_configs.py +5 -5
  30. squirrels/_model_queries.py +1 -1
  31. squirrels/_models.py +276 -143
  32. squirrels/_package_data/base_project/.env +1 -24
  33. squirrels/_package_data/base_project/.env.example +31 -17
  34. squirrels/_package_data/base_project/connections.yml +4 -3
  35. squirrels/_package_data/base_project/dashboards/dashboard_example.py +13 -7
  36. squirrels/_package_data/base_project/dashboards/dashboard_example.yml +6 -6
  37. squirrels/_package_data/base_project/docker/Dockerfile +2 -2
  38. squirrels/_package_data/base_project/docker/compose.yml +1 -1
  39. squirrels/_package_data/base_project/duckdb_init.sql +1 -0
  40. squirrels/_package_data/base_project/models/builds/build_example.py +2 -2
  41. squirrels/_package_data/base_project/models/dbviews/dbview_example.sql +7 -2
  42. squirrels/_package_data/base_project/models/dbviews/dbview_example.yml +16 -10
  43. squirrels/_package_data/base_project/models/federates/federate_example.py +27 -17
  44. squirrels/_package_data/base_project/models/federates/federate_example.sql +3 -7
  45. squirrels/_package_data/base_project/models/federates/federate_example.yml +7 -7
  46. squirrels/_package_data/base_project/models/sources.yml +5 -6
  47. squirrels/_package_data/base_project/parameters.yml +24 -38
  48. squirrels/_package_data/base_project/pyconfigs/connections.py +8 -3
  49. squirrels/_package_data/base_project/pyconfigs/context.py +26 -14
  50. squirrels/_package_data/base_project/pyconfigs/parameters.py +124 -81
  51. squirrels/_package_data/base_project/pyconfigs/user.py +48 -15
  52. squirrels/_package_data/base_project/resources/public/.gitkeep +0 -0
  53. squirrels/_package_data/base_project/seeds/seed_categories.yml +1 -1
  54. squirrels/_package_data/base_project/seeds/seed_subcategories.yml +1 -1
  55. squirrels/_package_data/base_project/squirrels.yml.j2 +21 -31
  56. squirrels/_package_data/templates/login_successful.html +53 -0
  57. squirrels/_package_data/templates/squirrels_studio.html +22 -0
  58. squirrels/_parameter_configs.py +43 -22
  59. squirrels/_parameter_options.py +1 -1
  60. squirrels/_parameter_sets.py +41 -30
  61. squirrels/_parameters.py +560 -123
  62. squirrels/_project.py +487 -277
  63. squirrels/_py_module.py +71 -10
  64. squirrels/_request_context.py +33 -0
  65. squirrels/_schemas/__init__.py +0 -0
  66. squirrels/_schemas/auth_models.py +83 -0
  67. squirrels/_schemas/query_param_models.py +70 -0
  68. squirrels/_schemas/request_models.py +26 -0
  69. squirrels/_schemas/response_models.py +286 -0
  70. squirrels/_seeds.py +52 -13
  71. squirrels/_sources.py +29 -23
  72. squirrels/_utils.py +221 -42
  73. squirrels/_version.py +1 -3
  74. squirrels/arguments.py +7 -2
  75. squirrels/auth.py +4 -0
  76. squirrels/connections.py +2 -0
  77. squirrels/dashboards.py +3 -1
  78. squirrels/data_sources.py +6 -0
  79. squirrels/parameter_options.py +5 -0
  80. squirrels/parameters.py +5 -0
  81. squirrels/types.py +10 -3
  82. squirrels-0.6.0.post0.dist-info/METADATA +148 -0
  83. squirrels-0.6.0.post0.dist-info/RECORD +101 -0
  84. {squirrels-0.5.0b3.dist-info → squirrels-0.6.0.post0.dist-info}/WHEEL +1 -1
  85. squirrels/_api_response_models.py +0 -190
  86. squirrels/_dashboard_types.py +0 -82
  87. squirrels/_dashboards_io.py +0 -79
  88. squirrels-0.5.0b3.dist-info/METADATA +0 -110
  89. squirrels-0.5.0b3.dist-info/RECORD +0 -80
  90. /squirrels/_package_data/base_project/{assets → resources}/expenses.db +0 -0
  91. /squirrels/_package_data/base_project/{assets → resources}/weather.db +0 -0
  92. {squirrels-0.5.0b3.dist-info → squirrels-0.6.0.post0.dist-info}/entry_points.txt +0 -0
  93. {squirrels-0.5.0b3.dist-info → squirrels-0.6.0.post0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,156 @@
1
+ """
2
+ Dashboard routes for parameters and results
3
+ """
4
+ from typing import Callable, Coroutine, Any
5
+ from fastapi import FastAPI, Depends, Request
6
+ from fastapi.responses import Response, HTMLResponse
7
+ from fastapi.security import HTTPBearer
8
+ from dataclasses import asdict
9
+ from cachetools import TTLCache
10
+ import time
11
+
12
+ from .. import _constants as c, _utils as u
13
+ from .._schemas import response_models as rm
14
+ from .._exceptions import ConfigurationError
15
+ from .._dashboards import Dashboard
16
+ from .._schemas.query_param_models import get_query_models_for_parameters, get_query_models_for_dashboard
17
+ from .._schemas.auth_models import AbstractUser
18
+ from .base import RouteBase
19
+
20
+
21
+ class DashboardRoutes(RouteBase):
22
+ """Dashboard parameter and result routes"""
23
+
24
+ def __init__(self, get_bearer_token: HTTPBearer, project, no_cache: bool = False):
25
+ super().__init__(get_bearer_token, project, no_cache)
26
+
27
+ # Setup caches
28
+ self.dashboard_results_cache = TTLCache(
29
+ maxsize=self.env_vars.dashboards_cache_size,
30
+ ttl=self.env_vars.dashboards_cache_ttl_minutes*60
31
+ )
32
+
33
+ async def _get_dashboard_results_helper(
34
+ self, dashboard: str, user: AbstractUser, selections: tuple[tuple[str, Any], ...], configurables: tuple[tuple[str, str], ...]
35
+ ) -> Dashboard:
36
+ """Helper to get dashboard results"""
37
+ cfg_filtered = {k: v for k, v in dict(configurables).items() if k in self.manifest_cfg.configurables}
38
+ return await self.project.dashboard(dashboard, user=user, selections=dict(selections), configurables=cfg_filtered)
39
+
40
+ async def _get_dashboard_results_cachable(
41
+ self, dashboard: str, user: AbstractUser, selections: tuple[tuple[str, Any], ...], configurables: tuple[tuple[str, str], ...]
42
+ ) -> Dashboard:
43
+ """Cachable version of dashboard results helper"""
44
+ return await self.do_cachable_action(self.dashboard_results_cache, self._get_dashboard_results_helper, dashboard, user, selections, configurables)
45
+
46
+ async def _get_dashboard_results_definition(
47
+ self, dashboard_name: str, user: AbstractUser, params: dict, headers: dict[str, str]
48
+ ) -> Response:
49
+ """Get dashboard results definition"""
50
+ get_dashboard_function = self._get_dashboard_results_helper if self.no_cache else self._get_dashboard_results_cachable
51
+ selections = self.get_selections_as_immutable(params, uncached_keys=set())
52
+
53
+ user_has_elevated_privileges = u.user_has_elevated_privileges(user.access_level, self.env_vars.elevated_access_level)
54
+ configurables = self.get_configurables_from_headers(headers) if user_has_elevated_privileges else tuple()
55
+ dashboard_obj = await get_dashboard_function(dashboard_name, user, selections, configurables)
56
+
57
+ if dashboard_obj._format == c.PNG:
58
+ assert isinstance(dashboard_obj._content, bytes)
59
+ result = Response(dashboard_obj._content, media_type="image/png")
60
+ elif dashboard_obj._format == c.HTML:
61
+ result = HTMLResponse(dashboard_obj._content)
62
+ else:
63
+ raise NotImplementedError()
64
+ return result
65
+
66
+ def setup_routes(
67
+ self, app: FastAPI, param_fields: dict,
68
+ get_parameters_definition: Callable[..., Coroutine[Any, Any, rm.ParametersModel]]
69
+ ) -> None:
70
+ """Setup dashboard routes"""
71
+
72
+ dashboard_results_path = '/dashboards/{dashboard}'
73
+ dashboard_parameters_path = dashboard_results_path + '/parameters'
74
+
75
+ def validate_parameters_list(parameters: list[str] | None, entity_type: str, dashboard_name: str) -> None:
76
+ if parameters is None:
77
+ return
78
+ for param in parameters:
79
+ if param not in param_fields:
80
+ all_params = list(param_fields.keys())
81
+ raise ConfigurationError(
82
+ f"{entity_type} '{dashboard_name}' use parameter '{param}' which doesn't exist. Available parameters are:"
83
+ f"\n {all_params}"
84
+ )
85
+
86
+ # Dashboard parameters and results APIs
87
+ for dashboard_name, dashboard in self.project._dashboards.items():
88
+ dashboard_name_for_api = u.normalize_name_for_api(dashboard_name)
89
+ curr_parameters_path = dashboard_parameters_path.format(dashboard=dashboard_name_for_api)
90
+ curr_results_path = dashboard_results_path.format(dashboard=dashboard_name_for_api)
91
+
92
+ validate_parameters_list(dashboard.config.parameters, "Dashboard", dashboard_name)
93
+
94
+ QueryModelForGetParams, QueryModelForPostParams = get_query_models_for_parameters(param_fields, dashboard.config.parameters)
95
+ QueryModelForGetDash, QueryModelForPostDash = get_query_models_for_dashboard(param_fields, dashboard.config.parameters)
96
+
97
+ @app.get(curr_parameters_path, tags=[f"Dashboard '{dashboard_name}'"], description=self._parameters_description)
98
+ async def get_dashboard_parameters(
99
+ request: Request, params: QueryModelForGetParams, user=Depends(self.get_current_user)
100
+ ) -> rm.ParametersModel:
101
+ start = time.time()
102
+ curr_dashboard_name = self.get_name_from_path_section(request, -2)
103
+ parameters_list = self.project._dashboards[curr_dashboard_name].config.parameters
104
+ scope = self.project._dashboards[curr_dashboard_name].config.scope
105
+ result = await get_parameters_definition(
106
+ parameters_list, "dashboard", curr_dashboard_name, scope, user, asdict(params)
107
+ )
108
+ self.logger.log_activity_time(
109
+ "GET REQUEST for PARAMETERS", start, additional_data={"dashboard_name": curr_dashboard_name}
110
+ )
111
+ return result
112
+
113
+ @app.post(curr_parameters_path, tags=[f"Dashboard '{dashboard_name}'"], description=self._parameters_description)
114
+ async def get_dashboard_parameters_with_post(
115
+ request: Request, params: QueryModelForPostParams, user=Depends(self.get_current_user)
116
+ ) -> rm.ParametersModel:
117
+ start = time.time()
118
+ curr_dashboard_name = self.get_name_from_path_section(request, -2)
119
+ parameters_list = self.project._dashboards[curr_dashboard_name].config.parameters
120
+ scope = self.project._dashboards[curr_dashboard_name].config.scope
121
+ result = await get_parameters_definition(
122
+ parameters_list, "dashboard", curr_dashboard_name, scope, user, params.model_dump()
123
+ )
124
+ self.logger.log_activity_time(
125
+ "POST REQUEST for PARAMETERS", start, additional_data={"dashboard_name": curr_dashboard_name}
126
+ )
127
+ return result
128
+
129
+ @app.get(curr_results_path, tags=[f"Dashboard '{dashboard_name}'"], description=dashboard.config.description)
130
+ async def get_dashboard_results(
131
+ request: Request, params: QueryModelForGetDash, user=Depends(self.get_current_user)
132
+ ) -> Response:
133
+ start = time.time()
134
+ curr_dashboard_name = self.get_name_from_path_section(request, -1)
135
+ result = await self._get_dashboard_results_definition(
136
+ curr_dashboard_name, user, asdict(params), headers=dict(request.headers)
137
+ )
138
+ self.logger.log_activity_time(
139
+ "GET REQUEST for DASHBOARD RESULTS", start, additional_data={"dashboard_name": curr_dashboard_name}
140
+ )
141
+ return result
142
+
143
+ @app.post(curr_results_path, tags=[f"Dashboard '{dashboard_name}'"], description=dashboard.config.description, response_class=Response)
144
+ async def get_dashboard_results_with_post(
145
+ request: Request, params: QueryModelForPostDash, user=Depends(self.get_current_user)
146
+ ) -> Response:
147
+ start = time.time()
148
+ curr_dashboard_name = self.get_name_from_path_section(request, -1)
149
+ result = await self._get_dashboard_results_definition(
150
+ curr_dashboard_name, user, params.model_dump(), headers=dict(request.headers)
151
+ )
152
+ self.logger.log_activity_time(
153
+ "POST REQUEST for DASHBOARD RESULTS", start, additional_data={"dashboard_name": curr_dashboard_name}
154
+ )
155
+ return result
156
+
@@ -0,0 +1,148 @@
1
+ """
2
+ Data management routes for build and query models
3
+ """
4
+ from typing import Any, Annotated
5
+ from fastapi import FastAPI, Depends, Request, Response, status, Path
6
+ from fastapi.responses import JSONResponse
7
+ from fastapi.security import HTTPBearer
8
+ from dataclasses import asdict
9
+ from cachetools import TTLCache
10
+ import time
11
+
12
+ from .. import _constants as c, _utils as u
13
+ from .._schemas import response_models as rm
14
+ from .._exceptions import InvalidInputError
15
+ from .._schemas.auth_models import AbstractUser
16
+ from .._dataset_types import DatasetResult
17
+ from .._schemas.query_param_models import get_query_models_for_querying_models, get_query_models_for_compiled_models
18
+ from .base import RouteBase
19
+
20
+
21
+ class DataManagementRoutes(RouteBase):
22
+ """Data management routes for build and query operations"""
23
+
24
+ def __init__(self, get_bearer_token: HTTPBearer, project, no_cache: bool = False):
25
+ super().__init__(get_bearer_token, project, no_cache)
26
+
27
+ # Setup cache (same settings as dataset results cache)
28
+ self.query_models_cache = TTLCache(
29
+ maxsize=self.env_vars.datasets_cache_size,
30
+ ttl=self.env_vars.datasets_cache_ttl_minutes*60
31
+ )
32
+
33
+ async def _query_models_helper(
34
+ self, sql_query: str, user: AbstractUser, selections: tuple[tuple[str, Any], ...], configurables: tuple[tuple[str, str], ...]
35
+ ) -> DatasetResult:
36
+ """Helper to query models"""
37
+ cfg_filtered = {k: v for k, v in dict(configurables).items() if k in self.manifest_cfg.configurables}
38
+ return await self.project.query_models(sql_query, user=user, selections=dict(selections), configurables=cfg_filtered)
39
+
40
+ async def _query_models_cachable(
41
+ self, sql_query: str, user: AbstractUser, selections: tuple[tuple[str, Any], ...], configurables: tuple[tuple[str, str], ...]
42
+ ) -> DatasetResult:
43
+ """Cachable version of query models helper"""
44
+ return await self.do_cachable_action(self.query_models_cache, self._query_models_helper, sql_query, user, selections, configurables)
45
+
46
+ async def _query_models_definition(
47
+ self, user: AbstractUser, params: dict, *, headers: dict[str, str]
48
+ ) -> rm.DatasetResultModel:
49
+ """Query models definition"""
50
+ if not u.user_has_elevated_privileges(user.access_level, self.env_vars.elevated_access_level):
51
+ raise InvalidInputError(403, "unauthorized_access_to_query_models", f"User '{user}' does not have permission to query data models")
52
+
53
+ sql_query = params.get("x_sql_query")
54
+ if sql_query is None:
55
+ raise InvalidInputError(400, "sql_query_required", "SQL query must be provided")
56
+
57
+ query_models_function = self._query_models_helper if self.no_cache else self._query_models_cachable
58
+ uncached_keys = {"x_sql_query", "x_orientation", "x_offset", "x_limit"}
59
+ selections = self.get_selections_as_immutable(params, uncached_keys)
60
+ configurables = self.get_configurables_from_headers(headers)
61
+ result = await query_models_function(sql_query, user, selections, configurables)
62
+
63
+ result_format = self.extract_orientation_offset_and_limit(params)
64
+ return rm.DatasetResultModel(**result.to_json(result_format))
65
+
66
+ async def _get_compiled_model_definition(
67
+ self, model_name: str, user: AbstractUser, params: dict, *, headers: dict[str, str]
68
+ ) -> rm.CompiledQueryModel:
69
+ """Get compiled model definition"""
70
+ normalized_model_name = u.normalize_name(model_name)
71
+ # self._validate_request_params(all_request_params, params, headers)
72
+
73
+ # Internal users only
74
+ if not u.user_has_elevated_privileges(user.access_level, self.env_vars.elevated_access_level):
75
+ raise InvalidInputError(403, "unauthorized_access_to_compile_model", f"User '{user}' does not have permission to fetch compiled SQL")
76
+
77
+ selections = self.get_selections_as_immutable(params, uncached_keys=set())
78
+ configurables = self.get_configurables_from_headers(headers)
79
+ cfg_filtered = {k: v for k, v in dict(configurables).items() if k in self.manifest_cfg.configurables}
80
+ return await self.project.get_compiled_model_query(normalized_model_name, user=user, selections=dict(selections), configurables=cfg_filtered)
81
+
82
+ def setup_routes(self, app: FastAPI, param_fields: dict) -> None:
83
+ """Setup data management routes"""
84
+
85
+ # Build project endpoint
86
+ build_path = '/build'
87
+
88
+ @app.post(build_path, tags=["Data Management"], summary="Build or update the Virtual Data Lake (VDL) for the project")
89
+ async def build(user=Depends(self.get_current_user)) -> Response:
90
+ if not u.user_has_elevated_privileges(user.access_level, self.env_vars.elevated_access_level):
91
+ raise InvalidInputError(403, "unauthorized_access_to_build_model", f"User '{user}' does not have permission to build the virtual data lake (VDL)")
92
+ await self.project.build()
93
+ return Response(status_code=status.HTTP_200_OK)
94
+
95
+ # Query result endpoints
96
+ query_models_path = '/query-result'
97
+ QueryModelForQueryModels, QueryModelForPostQueryModels = get_query_models_for_querying_models(param_fields)
98
+
99
+ @app.get(query_models_path, tags=["Data Management"], response_class=JSONResponse)
100
+ async def query_models(
101
+ request: Request, params: QueryModelForQueryModels, user=Depends(self.get_current_user)
102
+ ) -> rm.DatasetResultModel:
103
+ start = time.time()
104
+ result = await self._query_models_definition(user, asdict(params), headers=dict(request.headers))
105
+ self.logger.log_activity_time("GET REQUEST for QUERY MODELS", start)
106
+ return result
107
+
108
+ @app.post(query_models_path, tags=["Data Management"], response_class=JSONResponse)
109
+ async def query_models_with_post(
110
+ request: Request, params: QueryModelForPostQueryModels, user=Depends(self.get_current_user)
111
+ ) -> rm.DatasetResultModel:
112
+ start = time.time()
113
+ result = await self._query_models_definition(user, params.model_dump(), headers=dict(request.headers))
114
+ self.logger.log_activity_time("POST REQUEST for QUERY MODELS", start)
115
+ return result
116
+
117
+ # Compiled models endpoints
118
+ compiled_models_path = '/compiled-models/{model_name}'
119
+ QueryModelForGetCompiled, QueryModelForPostCompiled = get_query_models_for_compiled_models(param_fields)
120
+
121
+ @app.get(compiled_models_path, tags=["Data Management"], response_class=JSONResponse, summary="Get compiled definition for a model")
122
+ async def get_compiled_model(
123
+ request: Request,
124
+ model_name: Annotated[str, Path(description="The name of the model. Both snake case (with underscores) and kebab case (with dashes) are supported")],
125
+ params: QueryModelForGetCompiled,
126
+ user=Depends(self.get_current_user)
127
+ ) -> rm.CompiledQueryModel:
128
+ start = time.time()
129
+ result = await self._get_compiled_model_definition(model_name, user, asdict(params), headers=dict(request.headers))
130
+ self.logger.log_activity_time(
131
+ "GET REQUEST for GET COMPILED MODEL", start, additional_data={"model_name": model_name}
132
+ )
133
+ return result
134
+
135
+ @app.post(compiled_models_path, tags=["Data Management"], response_class=JSONResponse, summary="Get compiled definition for a model")
136
+ async def get_compiled_model_with_post(
137
+ request: Request,
138
+ model_name: Annotated[str, Path(description="The name of the model. Both snake case (with underscores) and kebab case (with dashes) are supported")],
139
+ params: QueryModelForPostCompiled,
140
+ user=Depends(self.get_current_user)
141
+ ) -> rm.CompiledQueryModel:
142
+ start = time.time()
143
+ result = await self._get_compiled_model_definition(model_name, user, params.model_dump(), headers=dict(request.headers))
144
+ self.logger.log_activity_time(
145
+ "POST REQUEST for GET COMPILED MODEL", start, additional_data={"model_name": model_name}
146
+ )
147
+ return result
148
+
@@ -0,0 +1,220 @@
1
+ """
2
+ Dataset routes for parameters and results
3
+ """
4
+ from typing import Callable, Coroutine, Any
5
+ from fastapi import FastAPI, Depends, Request
6
+ from fastapi.responses import JSONResponse
7
+ from fastapi.security import HTTPBearer
8
+ from dataclasses import asdict
9
+ from cachetools import TTLCache
10
+
11
+ import time
12
+ import polars as pl
13
+
14
+ from .. import _utils as u
15
+ from .._schemas import response_models as rm
16
+ from .._exceptions import ConfigurationError, InvalidInputError
17
+ from .._dataset_types import DatasetResult
18
+ from .._schemas.query_param_models import get_query_models_for_parameters, get_query_models_for_dataset
19
+ from .._schemas.auth_models import AbstractUser
20
+ from .base import RouteBase
21
+
22
+
23
+ class DatasetRoutes(RouteBase):
24
+ """Dataset parameter and result routes"""
25
+
26
+ def __init__(self, get_bearer_token: HTTPBearer, project, no_cache: bool = False):
27
+ super().__init__(get_bearer_token, project, no_cache)
28
+
29
+ # Setup caches
30
+ self.dataset_results_cache = TTLCache(
31
+ maxsize=self.env_vars.datasets_cache_size,
32
+ ttl=self.env_vars.datasets_cache_ttl_minutes*60
33
+ )
34
+
35
+ # Setup max rows
36
+ self.max_result_rows = self.env_vars.datasets_max_rows_output
37
+
38
+ # Setup SQL query timeout
39
+ self.sql_timeout_seconds = self.env_vars.datasets_sql_timeout_seconds
40
+
41
+ async def _get_dataset_results_helper(
42
+ self, dataset: str, user: AbstractUser, selections: tuple[tuple[str, Any], ...], configurables: tuple[tuple[str, str], ...]
43
+ ) -> DatasetResult:
44
+ """Helper to get dataset results"""
45
+ # Only pass configurables that are defined in manifest
46
+ cfg_filtered = {k: v for k, v in dict(configurables).items() if k in self.manifest_cfg.configurables}
47
+ return await self.project._dataset_result(dataset, user=user, selections=dict(selections), configurables=cfg_filtered)
48
+
49
+ async def _get_dataset_results_cachable(
50
+ self, dataset: str, user: AbstractUser, selections: tuple[tuple[str, Any], ...], configurables: tuple[tuple[str, str], ...]
51
+ ) -> DatasetResult:
52
+ """Cachable version of dataset results helper"""
53
+ return await self.do_cachable_action(
54
+ self.dataset_results_cache, self._get_dataset_results_helper, dataset, user, selections, configurables
55
+ )
56
+
57
+ async def _get_dataset_result_object(
58
+ self, dataset_name: str, user: AbstractUser, params: dict, configurables: tuple[tuple[str, str], ...]
59
+ ) -> DatasetResult:
60
+ """Get dataset result object"""
61
+ # self._validate_request_params(all_request_params, params, headers)
62
+
63
+ get_dataset_function = self._get_dataset_results_helper if self.no_cache else self._get_dataset_results_cachable
64
+ uncached_keys = {"x_sql_query", "x_orientation", "x_offset", "x_limit"}
65
+ selections = self.get_selections_as_immutable(params, uncached_keys)
66
+
67
+ user_has_elevated_privileges = u.user_has_elevated_privileges(user.access_level, self.env_vars.elevated_access_level)
68
+ configurables_if_elevated: tuple[tuple[str, str], ...] = configurables if user_has_elevated_privileges else tuple()
69
+ result = await get_dataset_function(dataset_name, user, selections, configurables_if_elevated)
70
+
71
+ # Apply optional final SQL transformation before select/limit/offset
72
+ sql_query = params.get("x_sql_query")
73
+ if sql_query:
74
+ try:
75
+ transformed = await u.run_polars_sql_on_dataframes(
76
+ sql_query, {"result": result.df.lazy()}, timeout_seconds=self.sql_timeout_seconds, max_rows=self.max_result_rows+1
77
+ )
78
+ except Exception as e:
79
+ raise InvalidInputError(400, "invalid_sql_query", "Failed to run provided Polars SQL on the dataset result") from e
80
+
81
+ # Enforce max result rows on transformed result
82
+ row_count = transformed.select(pl.len()).item()
83
+ if row_count > self.max_result_rows:
84
+ raise InvalidInputError(
85
+ 413,
86
+ "dataset_result_too_large",
87
+ f"The transformed dataset result exceeds the maximum allowed of {self.max_result_rows} rows."
88
+ )
89
+
90
+ transformed = transformed.drop("_row_num", strict=False).with_row_index("_row_num", offset=1)
91
+ result = DatasetResult(target_model_config=result.target_model_config, df=transformed)
92
+
93
+ return result
94
+
95
+ async def _get_dataset_results_definition(
96
+ self, dataset_name: str, user: AbstractUser, params: dict, headers: dict[str, str]
97
+ ) -> rm.DatasetResultModel:
98
+ """Get dataset results definition"""
99
+ configurables = self.get_configurables_from_headers(headers)
100
+ result = await self._get_dataset_result_object(dataset_name, user, params, configurables)
101
+
102
+ result_format = self.extract_orientation_offset_and_limit(params)
103
+ return rm.DatasetResultModel(**result.to_json(result_format))
104
+
105
+ def setup_routes(
106
+ self, app: FastAPI, param_fields: dict,
107
+ get_parameters_definition: Callable[..., Coroutine[Any, Any, rm.ParametersModel]]
108
+ ) -> None:
109
+ """Setup dataset routes"""
110
+
111
+ dataset_results_path = '/datasets/{dataset}'
112
+ dataset_parameters_path = dataset_results_path + '/parameters'
113
+
114
+ def validate_parameters_list(parameters: list[str] | None, entity_type: str, dataset_name: str) -> None:
115
+ if parameters is None:
116
+ return
117
+ for param in parameters:
118
+ if param not in param_fields:
119
+ all_params = list(param_fields.keys())
120
+ raise ConfigurationError(
121
+ f"{entity_type} '{dataset_name}' use parameter '{param}' which doesn't exist. Available parameters are:"
122
+ f"\n {all_params}"
123
+ )
124
+
125
+ async def get_dataset_parameters_updates(dataset_name: str, user: AbstractUser, params: dict):
126
+ parameters_list = self.manifest_cfg.datasets[dataset_name].parameters
127
+ scope = self.manifest_cfg.datasets[dataset_name].scope
128
+ result = await get_parameters_definition(
129
+ parameters_list, "dataset", dataset_name, scope, user, params
130
+ )
131
+ return result
132
+
133
+ # Dataset parameters and results APIs
134
+ for dataset_name, dataset_config in self.manifest_cfg.datasets.items():
135
+ dataset_name_for_api = u.normalize_name_for_api(dataset_name)
136
+ curr_parameters_path = dataset_parameters_path.format(dataset=dataset_name_for_api)
137
+ curr_results_path = dataset_results_path.format(dataset=dataset_name_for_api)
138
+
139
+ validate_parameters_list(dataset_config.parameters, "Dataset", dataset_name)
140
+
141
+ QueryModelForGetParams, QueryModelForPostParams = get_query_models_for_parameters(param_fields, dataset_config.parameters)
142
+ QueryModelForGetDataset, QueryModelForPostDataset = get_query_models_for_dataset(param_fields, dataset_config.parameters)
143
+
144
+ @app.get(curr_parameters_path, tags=[f"Dataset '{dataset_name}'"], description=self._parameters_description, response_class=JSONResponse)
145
+ async def get_dataset_parameters(
146
+ request: Request, params: QueryModelForGetParams, user=Depends(self.get_current_user)
147
+ ) -> rm.ParametersModel:
148
+ start = time.time()
149
+ curr_dataset_name = self.get_name_from_path_section(request, -2)
150
+ result = await get_dataset_parameters_updates(curr_dataset_name, user, asdict(params))
151
+ self.logger.log_activity_time(
152
+ "GET REQUEST for PARAMETERS", start, additional_data={"dataset_name": curr_dataset_name}
153
+ )
154
+ return result
155
+
156
+ @app.post(curr_parameters_path, tags=[f"Dataset '{dataset_name}'"], description=self._parameters_description, response_class=JSONResponse)
157
+ async def get_dataset_parameters_with_post(
158
+ request: Request, params: QueryModelForPostParams, user=Depends(self.get_current_user)
159
+ ) -> rm.ParametersModel:
160
+ start = time.time()
161
+ curr_dataset_name = self.get_name_from_path_section(request, -2)
162
+ result = await get_dataset_parameters_updates(curr_dataset_name, user, params.model_dump())
163
+ self.logger.log_activity_time(
164
+ "POST REQUEST for PARAMETERS", start, additional_data={"dataset_name": curr_dataset_name}
165
+ )
166
+ return result
167
+
168
+ @app.get(curr_results_path, tags=[f"Dataset '{dataset_name}'"], description=dataset_config.description, response_class=JSONResponse)
169
+ async def get_dataset_results(
170
+ request: Request, params: QueryModelForGetDataset, user=Depends(self.get_current_user)
171
+ ) -> rm.DatasetResultModel:
172
+ start = time.time()
173
+ curr_dataset_name = self.get_name_from_path_section(request, -1)
174
+ result = await self._get_dataset_results_definition(
175
+ curr_dataset_name, user, asdict(params), headers=dict(request.headers)
176
+ )
177
+ self.logger.log_activity_time(
178
+ "GET REQUEST for DATASET RESULTS", start, additional_data={"dataset_name": curr_dataset_name}
179
+ )
180
+ return result
181
+
182
+ @app.post(curr_results_path, tags=[f"Dataset '{dataset_name}'"], description=dataset_config.description, response_class=JSONResponse)
183
+ async def get_dataset_results_with_post(
184
+ request: Request, params: QueryModelForPostDataset, user=Depends(self.get_current_user)
185
+ ) -> rm.DatasetResultModel:
186
+ start = time.time()
187
+ curr_dataset_name = self.get_name_from_path_section(request, -1)
188
+ result = await self._get_dataset_results_definition(
189
+ curr_dataset_name, user, params.model_dump(), headers=dict(request.headers)
190
+ )
191
+ self.logger.log_activity_time(
192
+ "POST REQUEST for DATASET RESULTS", start, additional_data={"dataset_name": curr_dataset_name}
193
+ )
194
+ return result
195
+
196
+ # MCP-callable methods (exposed as instance attributes for McpServerBuilder)
197
+
198
+ async def get_dataset_parameters_for_mcp(
199
+ dataset: str, parameter_name: str, selected_ids: str | list[str], user: AbstractUser
200
+ ) -> rm.ParametersModel:
201
+ """Get dataset parameter updates for MCP tools. Takes user and headers."""
202
+ dataset_name = u.normalize_name(dataset)
203
+ parameters = {
204
+ "x_parent_param": parameter_name,
205
+ parameter_name: selected_ids
206
+ }
207
+ return await get_dataset_parameters_updates(dataset_name, user, parameters)
208
+
209
+ async def get_dataset_results_for_mcp(
210
+ dataset: str, parameters: dict[str, Any], sql_query: str | None, user: AbstractUser, configurables: tuple[tuple[str, str], ...]
211
+ ) -> DatasetResult:
212
+ """Get dataset results for MCP tools. Takes user and configurables."""
213
+ dataset_name = u.normalize_name(dataset)
214
+ parameters.update({ "x_sql_query": sql_query })
215
+ return await self._get_dataset_result_object(dataset_name, user, parameters, configurables)
216
+
217
+ # Store the MCP functions as instance attributes for access by McpServerBuilder
218
+ self._get_dataset_parameters_for_mcp = get_dataset_parameters_for_mcp
219
+ self._get_dataset_results_for_mcp = get_dataset_results_for_mcp
220
+