squirrels 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of squirrels might be problematic. Click here for more details.
- dateutils/__init__.py +6 -0
- dateutils/_enums.py +25 -0
- squirrels/dateutils.py → dateutils/_implementation.py +58 -111
- dateutils/types.py +6 -0
- squirrels/__init__.py +13 -11
- squirrels/_api_routes/__init__.py +5 -0
- squirrels/_api_routes/auth.py +271 -0
- squirrels/_api_routes/base.py +165 -0
- squirrels/_api_routes/dashboards.py +150 -0
- squirrels/_api_routes/data_management.py +145 -0
- squirrels/_api_routes/datasets.py +257 -0
- squirrels/_api_routes/oauth2.py +298 -0
- squirrels/_api_routes/project.py +252 -0
- squirrels/_api_server.py +256 -450
- squirrels/_arguments/__init__.py +0 -0
- squirrels/_arguments/init_time_args.py +108 -0
- squirrels/_arguments/run_time_args.py +147 -0
- squirrels/_auth.py +960 -0
- squirrels/_command_line.py +126 -45
- squirrels/_compile_prompts.py +147 -0
- squirrels/_connection_set.py +48 -26
- squirrels/_constants.py +68 -38
- squirrels/_dashboards.py +160 -0
- squirrels/_data_sources.py +570 -0
- squirrels/_dataset_types.py +84 -0
- squirrels/_exceptions.py +29 -0
- squirrels/_initializer.py +177 -80
- squirrels/_logging.py +115 -0
- squirrels/_manifest.py +208 -79
- squirrels/_model_builder.py +69 -0
- squirrels/_model_configs.py +74 -0
- squirrels/_model_queries.py +52 -0
- squirrels/_models.py +926 -367
- squirrels/_package_data/base_project/.env +42 -0
- squirrels/_package_data/base_project/.env.example +42 -0
- squirrels/_package_data/base_project/assets/expenses.db +0 -0
- squirrels/_package_data/base_project/connections.yml +16 -0
- squirrels/_package_data/base_project/dashboards/dashboard_example.py +34 -0
- squirrels/_package_data/base_project/dashboards/dashboard_example.yml +22 -0
- squirrels/{package_data → _package_data}/base_project/docker/.dockerignore +5 -2
- squirrels/{package_data → _package_data}/base_project/docker/Dockerfile +3 -3
- squirrels/{package_data → _package_data}/base_project/docker/compose.yml +1 -1
- squirrels/_package_data/base_project/duckdb_init.sql +10 -0
- squirrels/{package_data/base_project/.gitignore → _package_data/base_project/gitignore} +3 -2
- squirrels/_package_data/base_project/macros/macros_example.sql +17 -0
- squirrels/_package_data/base_project/models/builds/build_example.py +26 -0
- squirrels/_package_data/base_project/models/builds/build_example.sql +16 -0
- squirrels/_package_data/base_project/models/builds/build_example.yml +57 -0
- squirrels/_package_data/base_project/models/dbviews/dbview_example.sql +12 -0
- squirrels/_package_data/base_project/models/dbviews/dbview_example.yml +26 -0
- squirrels/_package_data/base_project/models/federates/federate_example.py +37 -0
- squirrels/_package_data/base_project/models/federates/federate_example.sql +19 -0
- squirrels/_package_data/base_project/models/federates/federate_example.yml +65 -0
- squirrels/_package_data/base_project/models/sources.yml +38 -0
- squirrels/{package_data → _package_data}/base_project/parameters.yml +56 -40
- squirrels/_package_data/base_project/pyconfigs/connections.py +14 -0
- squirrels/{package_data → _package_data}/base_project/pyconfigs/context.py +21 -40
- squirrels/_package_data/base_project/pyconfigs/parameters.py +141 -0
- squirrels/_package_data/base_project/pyconfigs/user.py +44 -0
- squirrels/_package_data/base_project/seeds/seed_categories.yml +15 -0
- squirrels/_package_data/base_project/seeds/seed_subcategories.csv +15 -0
- squirrels/_package_data/base_project/seeds/seed_subcategories.yml +21 -0
- squirrels/_package_data/base_project/squirrels.yml.j2 +61 -0
- squirrels/_package_data/templates/dataset_results.html +112 -0
- squirrels/_package_data/templates/oauth_login.html +271 -0
- squirrels/_package_data/templates/squirrels_studio.html +20 -0
- squirrels/_package_loader.py +8 -4
- squirrels/_parameter_configs.py +104 -103
- squirrels/_parameter_options.py +348 -0
- squirrels/_parameter_sets.py +57 -47
- squirrels/_parameters.py +1664 -0
- squirrels/_project.py +721 -0
- squirrels/_py_module.py +7 -5
- squirrels/_schemas/__init__.py +0 -0
- squirrels/_schemas/auth_models.py +167 -0
- squirrels/_schemas/query_param_models.py +75 -0
- squirrels/{_api_response_models.py → _schemas/response_models.py} +126 -47
- squirrels/_seeds.py +35 -16
- squirrels/_sources.py +110 -0
- squirrels/_utils.py +248 -73
- squirrels/_version.py +1 -1
- squirrels/arguments.py +7 -0
- squirrels/auth.py +4 -0
- squirrels/connections.py +3 -0
- squirrels/dashboards.py +2 -81
- squirrels/data_sources.py +14 -631
- squirrels/parameter_options.py +13 -348
- squirrels/parameters.py +14 -1266
- squirrels/types.py +16 -0
- squirrels-0.5.0.dist-info/METADATA +113 -0
- squirrels-0.5.0.dist-info/RECORD +97 -0
- {squirrels-0.4.1.dist-info → squirrels-0.5.0.dist-info}/WHEEL +1 -1
- squirrels-0.5.0.dist-info/entry_points.txt +3 -0
- {squirrels-0.4.1.dist-info → squirrels-0.5.0.dist-info/licenses}/LICENSE +1 -1
- squirrels/_authenticator.py +0 -85
- squirrels/_dashboards_io.py +0 -61
- squirrels/_environcfg.py +0 -84
- squirrels/arguments/init_time_args.py +0 -40
- squirrels/arguments/run_time_args.py +0 -208
- squirrels/package_data/assets/favicon.ico +0 -0
- squirrels/package_data/assets/index.css +0 -1
- squirrels/package_data/assets/index.js +0 -58
- squirrels/package_data/base_project/assets/expenses.db +0 -0
- squirrels/package_data/base_project/connections.yml +0 -7
- squirrels/package_data/base_project/dashboards/dashboard_example.py +0 -32
- squirrels/package_data/base_project/dashboards.yml +0 -10
- squirrels/package_data/base_project/env.yml +0 -29
- squirrels/package_data/base_project/models/dbviews/dbview_example.py +0 -47
- squirrels/package_data/base_project/models/dbviews/dbview_example.sql +0 -22
- squirrels/package_data/base_project/models/federates/federate_example.py +0 -21
- squirrels/package_data/base_project/models/federates/federate_example.sql +0 -3
- squirrels/package_data/base_project/pyconfigs/auth.py +0 -45
- squirrels/package_data/base_project/pyconfigs/connections.py +0 -19
- squirrels/package_data/base_project/pyconfigs/parameters.py +0 -95
- squirrels/package_data/base_project/seeds/seed_subcategories.csv +0 -15
- squirrels/package_data/base_project/squirrels.yml.j2 +0 -94
- squirrels/package_data/templates/index.html +0 -18
- squirrels/project.py +0 -378
- squirrels/user_base.py +0 -55
- squirrels-0.4.1.dist-info/METADATA +0 -117
- squirrels-0.4.1.dist-info/RECORD +0 -60
- squirrels-0.4.1.dist-info/entry_points.txt +0 -4
- /squirrels/{package_data → _package_data}/base_project/assets/weather.db +0 -0
- /squirrels/{package_data → _package_data}/base_project/seeds/seed_categories.csv +0 -0
- /squirrels/{package_data → _package_data}/base_project/tmp/.gitignore +0 -0
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Data management routes for build and query models
|
|
3
|
+
"""
|
|
4
|
+
from typing import Any
|
|
5
|
+
from fastapi import FastAPI, Depends, Request, Response, status
|
|
6
|
+
from fastapi.responses import JSONResponse
|
|
7
|
+
from fastapi.security import HTTPBearer
|
|
8
|
+
from dataclasses import asdict
|
|
9
|
+
from cachetools import TTLCache
|
|
10
|
+
import time
|
|
11
|
+
|
|
12
|
+
from .. import _constants as c, _utils as u
|
|
13
|
+
from .._schemas import response_models as rm
|
|
14
|
+
from .._exceptions import InvalidInputError
|
|
15
|
+
from .._schemas.auth_models import AbstractUser
|
|
16
|
+
from .._manifest import PermissionScope
|
|
17
|
+
from .._dataset_types import DatasetResult
|
|
18
|
+
from .._schemas.query_param_models import get_query_models_for_querying_models, get_query_models_for_compiled_models
|
|
19
|
+
from .base import RouteBase
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class DataManagementRoutes(RouteBase):
|
|
23
|
+
"""Data management routes for build and query operations"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, get_bearer_token: HTTPBearer, project, no_cache: bool = False):
|
|
26
|
+
super().__init__(get_bearer_token, project, no_cache)
|
|
27
|
+
|
|
28
|
+
# Setup cache (shared with dataset results cache)
|
|
29
|
+
dataset_results_cache_size = int(self.env_vars.get(c.SQRL_DATASETS_CACHE_SIZE, 128))
|
|
30
|
+
dataset_results_cache_ttl = int(self.env_vars.get(c.SQRL_DATASETS_CACHE_TTL_MINUTES, 60))
|
|
31
|
+
self.query_models_cache = TTLCache(maxsize=dataset_results_cache_size, ttl=dataset_results_cache_ttl*60)
|
|
32
|
+
|
|
33
|
+
async def _query_models_helper(
|
|
34
|
+
self, sql_query: str, user: AbstractUser, selections: tuple[tuple[str, Any], ...], configurables: tuple[tuple[str, str], ...]
|
|
35
|
+
) -> DatasetResult:
|
|
36
|
+
"""Helper to query models"""
|
|
37
|
+
cfg_filtered = {k: v for k, v in dict(configurables).items() if k in self.manifest_cfg.configurables}
|
|
38
|
+
return await self.project.query_models(sql_query, user=user, selections=dict(selections), configurables=cfg_filtered)
|
|
39
|
+
|
|
40
|
+
async def _query_models_cachable(
|
|
41
|
+
self, sql_query: str, user: AbstractUser, selections: tuple[tuple[str, Any], ...], configurables: tuple[tuple[str, str], ...]
|
|
42
|
+
) -> DatasetResult:
|
|
43
|
+
"""Cachable version of query models helper"""
|
|
44
|
+
return await self.do_cachable_action(self.query_models_cache, self._query_models_helper, sql_query, user, selections, configurables)
|
|
45
|
+
|
|
46
|
+
async def _query_models_definition(
|
|
47
|
+
self, user: AbstractUser, all_request_params: dict, params: dict, *, headers: dict[str, str]
|
|
48
|
+
) -> rm.DatasetResultModel:
|
|
49
|
+
"""Query models definition"""
|
|
50
|
+
self._validate_request_params(all_request_params, params, headers)
|
|
51
|
+
|
|
52
|
+
if not self.authenticator.can_user_access_scope(user, PermissionScope.PRIVATE):
|
|
53
|
+
raise InvalidInputError(403, "unauthorized_access_to_query_models", f"User '{user}' does not have permission to query data models")
|
|
54
|
+
|
|
55
|
+
sql_query = params.get("x_sql_query")
|
|
56
|
+
if sql_query is None:
|
|
57
|
+
raise InvalidInputError(400, "sql_query_required", "SQL query must be provided")
|
|
58
|
+
|
|
59
|
+
query_models_function = self._query_models_helper if self.no_cache else self._query_models_cachable
|
|
60
|
+
uncached_keys = {"x_verify_params", "x_sql_query", "x_orientation", "x_limit", "x_offset"}
|
|
61
|
+
selections = self.get_selections_as_immutable(params, uncached_keys)
|
|
62
|
+
configurables = self.get_configurables_from_headers(headers)
|
|
63
|
+
result = await query_models_function(sql_query, user, selections, configurables)
|
|
64
|
+
|
|
65
|
+
orientation_header = headers.get("x-orientation")
|
|
66
|
+
orientation = str(orientation_header).lower() if orientation_header is not None else params.get("x_orientation", "records")
|
|
67
|
+
limit = params.get("x_limit", 1000)
|
|
68
|
+
offset = params.get("x_offset", 0)
|
|
69
|
+
return rm.DatasetResultModel(**result.to_json(orientation, limit, offset))
|
|
70
|
+
|
|
71
|
+
async def _get_compiled_model_definition(
|
|
72
|
+
self, model_name: str, user: AbstractUser, all_request_params: dict, params: dict, *, headers: dict[str, str]
|
|
73
|
+
) -> rm.CompiledQueryModel:
|
|
74
|
+
"""Get compiled model definition"""
|
|
75
|
+
normalized_model_name = u.normalize_name(model_name)
|
|
76
|
+
self._validate_request_params(all_request_params, params, headers)
|
|
77
|
+
|
|
78
|
+
# Internal users only
|
|
79
|
+
if not self.authenticator.can_user_access_scope(user, PermissionScope.PRIVATE):
|
|
80
|
+
raise InvalidInputError(403, "unauthorized_access_to_compile_model", f"User '{user}' does not have permission to fetch compiled SQL")
|
|
81
|
+
|
|
82
|
+
selections = self.get_selections_as_immutable(params, uncached_keys={"x_verify_params"})
|
|
83
|
+
configurables = self.get_configurables_from_headers(headers)
|
|
84
|
+
cfg_filtered = {k: v for k, v in dict(configurables).items() if k in self.manifest_cfg.configurables}
|
|
85
|
+
return await self.project.get_compiled_model_query(normalized_model_name, user=user, selections=dict(selections), configurables=cfg_filtered)
|
|
86
|
+
|
|
87
|
+
def setup_routes(self, app: FastAPI, project_metadata_path: str, param_fields: dict) -> None:
|
|
88
|
+
"""Setup data management routes"""
|
|
89
|
+
|
|
90
|
+
# Build project endpoint
|
|
91
|
+
build_path = project_metadata_path + '/build'
|
|
92
|
+
|
|
93
|
+
@app.post(build_path, tags=["Data Management"], summary="Build or update the Virtual Data Lake (VDL) for the project")
|
|
94
|
+
async def build(user=Depends(self.get_current_user)): # type: ignore
|
|
95
|
+
if not self.authenticator.can_user_access_scope(user, PermissionScope.PRIVATE):
|
|
96
|
+
raise InvalidInputError(403, "unauthorized_access_to_build_model", f"User '{user}' does not have permission to build the virtual data lake (VDL)")
|
|
97
|
+
await self.project.build()
|
|
98
|
+
return Response(status_code=status.HTTP_200_OK)
|
|
99
|
+
|
|
100
|
+
# Query result endpoints
|
|
101
|
+
query_models_path = project_metadata_path + '/query-result'
|
|
102
|
+
QueryModelForQueryModels, QueryModelForPostQueryModels = get_query_models_for_querying_models(param_fields)
|
|
103
|
+
|
|
104
|
+
@app.get(query_models_path, tags=["Data Management"], response_class=JSONResponse)
|
|
105
|
+
async def query_models(
|
|
106
|
+
request: Request, params: QueryModelForQueryModels, user=Depends(self.get_current_user) # type: ignore
|
|
107
|
+
) -> rm.DatasetResultModel:
|
|
108
|
+
start = time.time()
|
|
109
|
+
result = await self._query_models_definition(user, dict(request.query_params), asdict(params), headers=dict(request.headers))
|
|
110
|
+
self.log_activity_time("GET REQUEST for QUERY MODELS", start, request)
|
|
111
|
+
return result
|
|
112
|
+
|
|
113
|
+
@app.post(query_models_path, tags=["Data Management"], response_class=JSONResponse)
|
|
114
|
+
async def query_models_with_post(
|
|
115
|
+
request: Request, params: QueryModelForPostQueryModels, user=Depends(self.get_current_user) # type: ignore
|
|
116
|
+
) -> rm.DatasetResultModel:
|
|
117
|
+
start = time.time()
|
|
118
|
+
payload: dict = await request.json()
|
|
119
|
+
result = await self._query_models_definition(user, payload, params.model_dump(), headers=dict(request.headers))
|
|
120
|
+
self.log_activity_time("POST REQUEST for QUERY MODELS", start, request)
|
|
121
|
+
return result
|
|
122
|
+
|
|
123
|
+
# Compiled models endpoints - TODO: remove duplication
|
|
124
|
+
compiled_models_path = project_metadata_path + '/compiled-models/{model_name}'
|
|
125
|
+
QueryModelForGetCompiled, QueryModelForPostCompiled = get_query_models_for_compiled_models(param_fields)
|
|
126
|
+
|
|
127
|
+
@app.get(compiled_models_path, tags=["Data Management"], response_class=JSONResponse, summary="Get compiled definition for a model")
|
|
128
|
+
async def get_compiled_model(
|
|
129
|
+
request: Request, model_name: str, params: QueryModelForGetCompiled, user=Depends(self.get_current_user)
|
|
130
|
+
) -> rm.CompiledQueryModel:
|
|
131
|
+
start = time.time()
|
|
132
|
+
result = await self._get_compiled_model_definition(model_name, user, dict(request.query_params), asdict(params), headers=dict(request.headers))
|
|
133
|
+
self.log_activity_time("GET REQUEST for GET COMPILED MODEL", start, request)
|
|
134
|
+
return result
|
|
135
|
+
|
|
136
|
+
@app.post(compiled_models_path, tags=["Data Management"], response_class=JSONResponse, summary="Get compiled definition for a model")
|
|
137
|
+
async def get_compiled_model_with_post(
|
|
138
|
+
request: Request, model_name: str, params: QueryModelForPostCompiled, user=Depends(self.get_current_user)
|
|
139
|
+
) -> rm.CompiledQueryModel:
|
|
140
|
+
start = time.time()
|
|
141
|
+
payload: dict = await request.json()
|
|
142
|
+
result = await self._get_compiled_model_definition(model_name, user, payload, params.model_dump(), headers=dict(request.headers))
|
|
143
|
+
self.log_activity_time("POST REQUEST for GET COMPILED MODEL", start, request)
|
|
144
|
+
return result
|
|
145
|
+
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dataset routes for parameters and results
|
|
3
|
+
"""
|
|
4
|
+
from typing import Callable, Coroutine, Any
|
|
5
|
+
from pydantic import Field
|
|
6
|
+
from fastapi import FastAPI, Depends, Request
|
|
7
|
+
from fastapi.responses import JSONResponse
|
|
8
|
+
from fastapi.security import HTTPBearer
|
|
9
|
+
|
|
10
|
+
from mcp.server.fastmcp import FastMCP, Context
|
|
11
|
+
from dataclasses import asdict
|
|
12
|
+
from cachetools import TTLCache
|
|
13
|
+
from textwrap import dedent
|
|
14
|
+
|
|
15
|
+
import time, json
|
|
16
|
+
|
|
17
|
+
from .. import _constants as c, _utils as u
|
|
18
|
+
from .._schemas import response_models as rm
|
|
19
|
+
from .._exceptions import ConfigurationError, InvalidInputError
|
|
20
|
+
from .._dataset_types import DatasetResult
|
|
21
|
+
from .._schemas.query_param_models import get_query_models_for_parameters, get_query_models_for_dataset
|
|
22
|
+
from .._schemas.auth_models import AbstractUser
|
|
23
|
+
from .base import RouteBase
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class DatasetRoutes(RouteBase):
|
|
27
|
+
"""Dataset parameter and result routes"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, get_bearer_token: HTTPBearer, project, no_cache: bool = False):
|
|
30
|
+
super().__init__(get_bearer_token, project, no_cache)
|
|
31
|
+
|
|
32
|
+
# Setup caches
|
|
33
|
+
dataset_results_cache_size = int(self.env_vars.get(c.SQRL_DATASETS_CACHE_SIZE, 128))
|
|
34
|
+
dataset_results_cache_ttl = int(self.env_vars.get(c.SQRL_DATASETS_CACHE_TTL_MINUTES, 60))
|
|
35
|
+
self.dataset_results_cache = TTLCache(maxsize=dataset_results_cache_size, ttl=dataset_results_cache_ttl*60)
|
|
36
|
+
|
|
37
|
+
# Setup max rows for AI
|
|
38
|
+
self.max_rows_for_ai = int(self.env_vars.get(c.SQRL_DATASETS_MAX_ROWS_FOR_AI, 100))
|
|
39
|
+
|
|
40
|
+
async def _get_dataset_results_helper(
|
|
41
|
+
self, dataset: str, user: AbstractUser, selections: tuple[tuple[str, Any], ...], configurables: tuple[tuple[str, str], ...]
|
|
42
|
+
) -> DatasetResult:
|
|
43
|
+
"""Helper to get dataset results"""
|
|
44
|
+
# Only pass configurables that are defined in manifest
|
|
45
|
+
cfg_filtered = {k: v for k, v in dict(configurables).items() if k in self.manifest_cfg.configurables}
|
|
46
|
+
return await self.project.dataset(dataset, user=user, selections=dict(selections), configurables=cfg_filtered)
|
|
47
|
+
|
|
48
|
+
async def _get_dataset_results_cachable(
|
|
49
|
+
self, dataset: str, user: AbstractUser, selections: tuple[tuple[str, Any], ...], configurables: tuple[tuple[str, str], ...]
|
|
50
|
+
) -> DatasetResult:
|
|
51
|
+
"""Cachable version of dataset results helper"""
|
|
52
|
+
return await self.do_cachable_action(self.dataset_results_cache, self._get_dataset_results_helper, dataset, user, selections, configurables)
|
|
53
|
+
|
|
54
|
+
async def _get_dataset_results_definition(
|
|
55
|
+
self, dataset_name: str, user: AbstractUser, all_request_params: dict, params: dict, headers: dict[str, str]
|
|
56
|
+
) -> rm.DatasetResultModel:
|
|
57
|
+
"""Get dataset results definition"""
|
|
58
|
+
self._validate_request_params(all_request_params, params, headers)
|
|
59
|
+
|
|
60
|
+
get_dataset_function = self._get_dataset_results_helper if self.no_cache else self._get_dataset_results_cachable
|
|
61
|
+
uncached_keys = {"x_verify_params", "x_orientation", "x_sql_query", "x_limit", "x_offset"}
|
|
62
|
+
selections = self.get_selections_as_immutable(params, uncached_keys)
|
|
63
|
+
|
|
64
|
+
user_has_elevated_privileges = u.user_has_elevated_privileges(user.access_level, self.project._elevated_access_level)
|
|
65
|
+
configurables = self.get_configurables_from_headers(headers) if user_has_elevated_privileges else tuple()
|
|
66
|
+
result = await get_dataset_function(dataset_name, user, selections, configurables)
|
|
67
|
+
|
|
68
|
+
# Apply optional final SQL transformation before select/limit/offset
|
|
69
|
+
sql_query = params.get("x_sql_query")
|
|
70
|
+
if sql_query:
|
|
71
|
+
try:
|
|
72
|
+
transformed = u.run_sql_on_dataframes(sql_query, {"result": result.df.lazy()})
|
|
73
|
+
except Exception as e:
|
|
74
|
+
raise InvalidInputError(400, "invalid_sql_query", "Failed to run provided SQL on the dataset result") from e
|
|
75
|
+
|
|
76
|
+
transformed = transformed.drop("_row_num", strict=False).with_row_index("_row_num", offset=1)
|
|
77
|
+
result = DatasetResult(target_model_config=result.target_model_config, df=transformed)
|
|
78
|
+
|
|
79
|
+
orientation_header = headers.get("x-orientation")
|
|
80
|
+
orientation = str(orientation_header).lower() if orientation_header is not None else params.get("x_orientation", "records")
|
|
81
|
+
limit = params.get("x_limit", 1000)
|
|
82
|
+
offset = params.get("x_offset", 0)
|
|
83
|
+
return rm.DatasetResultModel(**result.to_json(orientation, limit, offset))
|
|
84
|
+
|
|
85
|
+
def setup_routes(
|
|
86
|
+
self, app: FastAPI, mcp: FastMCP, project_metadata_path: str, project_name: str, project_label: str,
|
|
87
|
+
param_fields: dict, get_parameters_definition: Callable[..., Coroutine[Any, Any, rm.ParametersModel]]
|
|
88
|
+
) -> None:
|
|
89
|
+
"""Setup dataset routes"""
|
|
90
|
+
|
|
91
|
+
dataset_results_path = project_metadata_path + '/dataset/{dataset}'
|
|
92
|
+
dataset_parameters_path = dataset_results_path + '/parameters'
|
|
93
|
+
|
|
94
|
+
def validate_parameters_list(parameters: list[str] | None, entity_type: str, dataset_name: str) -> None:
|
|
95
|
+
if parameters is None:
|
|
96
|
+
return
|
|
97
|
+
for param in parameters:
|
|
98
|
+
if param not in param_fields:
|
|
99
|
+
all_params = list(param_fields.keys())
|
|
100
|
+
raise ConfigurationError(
|
|
101
|
+
f"{entity_type} '{dataset_name}' use parameter '{param}' which doesn't exist. Available parameters are:"
|
|
102
|
+
f"\n {all_params}"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
async def get_dataset_parameters_updates(dataset_name: str, user: AbstractUser, all_request_params: dict, params: dict, headers: dict[str, str]):
|
|
106
|
+
parameters_list = self.manifest_cfg.datasets[dataset_name].parameters
|
|
107
|
+
scope = self.manifest_cfg.datasets[dataset_name].scope
|
|
108
|
+
result = await get_parameters_definition(
|
|
109
|
+
parameters_list, "dataset", dataset_name, scope, user, all_request_params, params, headers=headers
|
|
110
|
+
)
|
|
111
|
+
return result
|
|
112
|
+
|
|
113
|
+
# Dataset parameters and results APIs
|
|
114
|
+
for dataset_name, dataset_config in self.manifest_cfg.datasets.items():
|
|
115
|
+
dataset_name_for_api = u.normalize_name_for_api(dataset_name)
|
|
116
|
+
curr_parameters_path = dataset_parameters_path.format(dataset=dataset_name_for_api)
|
|
117
|
+
curr_results_path = dataset_results_path.format(dataset=dataset_name_for_api)
|
|
118
|
+
|
|
119
|
+
validate_parameters_list(dataset_config.parameters, "Dataset", dataset_name)
|
|
120
|
+
|
|
121
|
+
QueryModelForGetParams, QueryModelForPostParams = get_query_models_for_parameters(dataset_config.parameters, param_fields)
|
|
122
|
+
QueryModelForGetDataset, QueryModelForPostDataset = get_query_models_for_dataset(dataset_config.parameters, param_fields)
|
|
123
|
+
|
|
124
|
+
@app.get(curr_parameters_path, tags=[f"Dataset '{dataset_name}'"], description=self._parameters_description, response_class=JSONResponse)
|
|
125
|
+
async def get_dataset_parameters(
|
|
126
|
+
request: Request, params: QueryModelForGetParams, user=Depends(self.get_current_user) # type: ignore
|
|
127
|
+
) -> rm.ParametersModel:
|
|
128
|
+
start = time.time()
|
|
129
|
+
curr_dataset_name = self.get_name_from_path_section(request, -2)
|
|
130
|
+
result = await get_dataset_parameters_updates(curr_dataset_name, user, dict(request.query_params), asdict(params), dict(request.headers))
|
|
131
|
+
self.log_activity_time("GET REQUEST for PARAMETERS", start, request)
|
|
132
|
+
return result
|
|
133
|
+
|
|
134
|
+
@app.post(curr_parameters_path, tags=[f"Dataset '{dataset_name}'"], description=self._parameters_description, response_class=JSONResponse)
|
|
135
|
+
async def get_dataset_parameters_with_post(
|
|
136
|
+
request: Request, params: QueryModelForPostParams, user=Depends(self.get_current_user) # type: ignore
|
|
137
|
+
) -> rm.ParametersModel:
|
|
138
|
+
start = time.time()
|
|
139
|
+
curr_dataset_name = self.get_name_from_path_section(request, -2)
|
|
140
|
+
payload: dict = await request.json()
|
|
141
|
+
result = await get_dataset_parameters_updates(curr_dataset_name, user, payload, params.model_dump(), dict(request.headers))
|
|
142
|
+
self.log_activity_time("POST REQUEST for PARAMETERS", start, request)
|
|
143
|
+
return result
|
|
144
|
+
|
|
145
|
+
@app.get(curr_results_path, tags=[f"Dataset '{dataset_name}'"], description=dataset_config.description, response_class=JSONResponse)
|
|
146
|
+
async def get_dataset_results(
|
|
147
|
+
request: Request, params: QueryModelForGetDataset, user=Depends(self.get_current_user) # type: ignore
|
|
148
|
+
) -> rm.DatasetResultModel:
|
|
149
|
+
start = time.time()
|
|
150
|
+
curr_dataset_name = self.get_name_from_path_section(request, -1)
|
|
151
|
+
result = await self._get_dataset_results_definition(
|
|
152
|
+
curr_dataset_name, user, dict(request.query_params), asdict(params), headers=dict(request.headers)
|
|
153
|
+
)
|
|
154
|
+
self.log_activity_time("GET REQUEST for DATASET RESULTS", start, request)
|
|
155
|
+
return result
|
|
156
|
+
|
|
157
|
+
@app.post(curr_results_path, tags=[f"Dataset '{dataset_name}'"], description=dataset_config.description, response_class=JSONResponse)
|
|
158
|
+
async def get_dataset_results_with_post(
|
|
159
|
+
request: Request, params: QueryModelForPostDataset, user=Depends(self.get_current_user) # type: ignore
|
|
160
|
+
) -> rm.DatasetResultModel:
|
|
161
|
+
start = time.time()
|
|
162
|
+
curr_dataset_name = self.get_name_from_path_section(request, -1)
|
|
163
|
+
payload: dict = await request.json()
|
|
164
|
+
result = await self._get_dataset_results_definition(
|
|
165
|
+
curr_dataset_name, user, payload, params.model_dump(), headers=dict(request.headers)
|
|
166
|
+
)
|
|
167
|
+
self.log_activity_time("POST REQUEST for DATASET RESULTS", start, request)
|
|
168
|
+
return result
|
|
169
|
+
|
|
170
|
+
# Setup MCP tools
|
|
171
|
+
|
|
172
|
+
@mcp.tool(
|
|
173
|
+
name=f"get_dataset_parameters_from_{project_name}",
|
|
174
|
+
title=f"Get Dataset Parameters Updates (Project: {project_label})",
|
|
175
|
+
description=dedent(f"""
|
|
176
|
+
Use this tool to get updates for dataset parameters in the Squirrels project "{project_name}" when a selection is to be made on a parameter with "trigger_refresh" as true.
|
|
177
|
+
|
|
178
|
+
For example, suppose there are two parameters, "country" and "city", and the user selects "United States" for "country". If "country" has the "trigger_refresh" field as true, then this tool should be called to get the updates for other parameters such as "city".
|
|
179
|
+
|
|
180
|
+
Do not use this tool on parameters whose "trigger_refresh" field is false!
|
|
181
|
+
""").strip()
|
|
182
|
+
)
|
|
183
|
+
async def get_dataset_parameters_tool(
|
|
184
|
+
ctx: Context,
|
|
185
|
+
dataset: str = Field(description="The name of the dataset whose parameters the trigger parameter will update"),
|
|
186
|
+
parameter_name: str = Field(description="The name of the parameter triggering the refresh"),
|
|
187
|
+
selected_ids: list[str] = Field(description="The ID(s) of the selected option(s) for the parameter"),
|
|
188
|
+
) -> rm.ParametersModel:
|
|
189
|
+
headers = self.get_headers_from_tool_ctx(ctx)
|
|
190
|
+
user = self.get_user_from_tool_headers(headers)
|
|
191
|
+
dataset_name = u.normalize_name(dataset)
|
|
192
|
+
payload = {
|
|
193
|
+
"x_parent_param": parameter_name,
|
|
194
|
+
parameter_name: selected_ids
|
|
195
|
+
}
|
|
196
|
+
return await get_dataset_parameters_updates(dataset_name, user, payload, payload, headers)
|
|
197
|
+
|
|
198
|
+
@mcp.tool(
|
|
199
|
+
name=f"get_dataset_results_from_{project_name}",
|
|
200
|
+
title=f"Get Dataset Results (Project: {project_label})",
|
|
201
|
+
description=dedent(f"""
|
|
202
|
+
Use this tool to get the dataset results as a JSON object for a dataset in the Squirrels project "{project_name}".
|
|
203
|
+
- Use the "offset" and "limit" arguments to limit the number of rows you require
|
|
204
|
+
- The "limit" argument controls the number of rows returned. The maximum allowed value is {self.max_rows_for_ai}. If the 'total_num_rows' field in the response is greater than {self.max_rows_for_ai}, let the user know that only {self.max_rows_for_ai} rows are shown and clarify if they would like to see more.
|
|
205
|
+
""").strip()
|
|
206
|
+
)
|
|
207
|
+
async def get_dataset_results_tool(
|
|
208
|
+
ctx: Context,
|
|
209
|
+
dataset: str = Field(description="The name of the dataset to get results for"),
|
|
210
|
+
parameters: str = Field(description=dedent("""
|
|
211
|
+
A JSON object (as string) containing key-value pairs for parameter name and selected value. The selected value to provide depends on the parameter widget type:
|
|
212
|
+
- For single select, use a string for the ID of the selected value
|
|
213
|
+
- For multi select, use an array of strings for the IDs of the selected values
|
|
214
|
+
- For date, use a string like "YYYY-MM-DD"
|
|
215
|
+
- For date ranges, use array of strings like ["YYYY-MM-DD","YYYY-MM-DD"]
|
|
216
|
+
- For number, use a number like 1
|
|
217
|
+
- For number ranges, use array of numbers like [1,100]
|
|
218
|
+
- For text, use a string for the text value
|
|
219
|
+
- Complex objects are NOT supported
|
|
220
|
+
""").strip()),
|
|
221
|
+
sql_query: str | None = Field(None, description=dedent("""
|
|
222
|
+
A custom DuckDB SQL query to execute on the final dataset result.
|
|
223
|
+
- Use table name 'result' to reference the dataset result.
|
|
224
|
+
- Use this to apply transformations to the dataset result if needed (such as filtering, sorting, or selecting columns).
|
|
225
|
+
- If not provided, the dataset result is returned as is.
|
|
226
|
+
""").strip()),
|
|
227
|
+
offset: int = Field(0, description="The number of rows to skip from first row. Applied after final SQL. Default is 0."),
|
|
228
|
+
limit: int = Field(self.max_rows_for_ai, description=f"The maximum number of rows to return. Applied after final SQL. Default is {self.max_rows_for_ai}. Maximum allowed value is {self.max_rows_for_ai}."),
|
|
229
|
+
) -> rm.DatasetResultModel:
|
|
230
|
+
if limit > self.max_rows_for_ai:
|
|
231
|
+
raise ValueError(f"The maximum number of rows to return is {self.max_rows_for_ai}.")
|
|
232
|
+
|
|
233
|
+
headers = self.get_headers_from_tool_ctx(ctx)
|
|
234
|
+
user = self.get_user_from_tool_headers(headers)
|
|
235
|
+
dataset_name = u.normalize_name(dataset)
|
|
236
|
+
|
|
237
|
+
try:
|
|
238
|
+
params = json.loads(parameters)
|
|
239
|
+
except json.JSONDecodeError:
|
|
240
|
+
params = None # error handled below
|
|
241
|
+
|
|
242
|
+
if not isinstance(params, dict):
|
|
243
|
+
raise InvalidInputError(400, "invalid_parameters", f"The 'parameters' argument must be a JSON object.")
|
|
244
|
+
|
|
245
|
+
params.update({
|
|
246
|
+
"x_sql_query": sql_query,
|
|
247
|
+
"x_offset": offset,
|
|
248
|
+
"x_limit": limit
|
|
249
|
+
})
|
|
250
|
+
|
|
251
|
+
# Set default orientation as rows if not provided
|
|
252
|
+
if "x-orientation" not in headers:
|
|
253
|
+
headers["x-orientation"] = "rows"
|
|
254
|
+
|
|
255
|
+
result = await self._get_dataset_results_definition(dataset_name, user, params, params, headers)
|
|
256
|
+
return result
|
|
257
|
+
|