squirrels 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of squirrels might be problematic. Click here for more details.

Files changed (125) hide show
  1. dateutils/__init__.py +6 -0
  2. dateutils/_enums.py +25 -0
  3. squirrels/dateutils.py → dateutils/_implementation.py +58 -111
  4. dateutils/types.py +6 -0
  5. squirrels/__init__.py +13 -11
  6. squirrels/_api_routes/__init__.py +5 -0
  7. squirrels/_api_routes/auth.py +271 -0
  8. squirrels/_api_routes/base.py +165 -0
  9. squirrels/_api_routes/dashboards.py +150 -0
  10. squirrels/_api_routes/data_management.py +145 -0
  11. squirrels/_api_routes/datasets.py +257 -0
  12. squirrels/_api_routes/oauth2.py +298 -0
  13. squirrels/_api_routes/project.py +252 -0
  14. squirrels/_api_server.py +256 -450
  15. squirrels/_arguments/__init__.py +0 -0
  16. squirrels/_arguments/init_time_args.py +108 -0
  17. squirrels/_arguments/run_time_args.py +147 -0
  18. squirrels/_auth.py +960 -0
  19. squirrels/_command_line.py +126 -45
  20. squirrels/_compile_prompts.py +147 -0
  21. squirrels/_connection_set.py +48 -26
  22. squirrels/_constants.py +68 -38
  23. squirrels/_dashboards.py +160 -0
  24. squirrels/_data_sources.py +570 -0
  25. squirrels/_dataset_types.py +84 -0
  26. squirrels/_exceptions.py +29 -0
  27. squirrels/_initializer.py +177 -80
  28. squirrels/_logging.py +115 -0
  29. squirrels/_manifest.py +208 -79
  30. squirrels/_model_builder.py +69 -0
  31. squirrels/_model_configs.py +74 -0
  32. squirrels/_model_queries.py +52 -0
  33. squirrels/_models.py +926 -367
  34. squirrels/_package_data/base_project/.env +42 -0
  35. squirrels/_package_data/base_project/.env.example +42 -0
  36. squirrels/_package_data/base_project/assets/expenses.db +0 -0
  37. squirrels/_package_data/base_project/connections.yml +16 -0
  38. squirrels/_package_data/base_project/dashboards/dashboard_example.py +34 -0
  39. squirrels/_package_data/base_project/dashboards/dashboard_example.yml +22 -0
  40. squirrels/{package_data → _package_data}/base_project/docker/.dockerignore +5 -2
  41. squirrels/{package_data → _package_data}/base_project/docker/Dockerfile +3 -3
  42. squirrels/{package_data → _package_data}/base_project/docker/compose.yml +1 -1
  43. squirrels/_package_data/base_project/duckdb_init.sql +10 -0
  44. squirrels/{package_data/base_project/.gitignore → _package_data/base_project/gitignore} +3 -2
  45. squirrels/_package_data/base_project/macros/macros_example.sql +17 -0
  46. squirrels/_package_data/base_project/models/builds/build_example.py +26 -0
  47. squirrels/_package_data/base_project/models/builds/build_example.sql +16 -0
  48. squirrels/_package_data/base_project/models/builds/build_example.yml +57 -0
  49. squirrels/_package_data/base_project/models/dbviews/dbview_example.sql +12 -0
  50. squirrels/_package_data/base_project/models/dbviews/dbview_example.yml +26 -0
  51. squirrels/_package_data/base_project/models/federates/federate_example.py +37 -0
  52. squirrels/_package_data/base_project/models/federates/federate_example.sql +19 -0
  53. squirrels/_package_data/base_project/models/federates/federate_example.yml +65 -0
  54. squirrels/_package_data/base_project/models/sources.yml +38 -0
  55. squirrels/{package_data → _package_data}/base_project/parameters.yml +56 -40
  56. squirrels/_package_data/base_project/pyconfigs/connections.py +14 -0
  57. squirrels/{package_data → _package_data}/base_project/pyconfigs/context.py +21 -40
  58. squirrels/_package_data/base_project/pyconfigs/parameters.py +141 -0
  59. squirrels/_package_data/base_project/pyconfigs/user.py +44 -0
  60. squirrels/_package_data/base_project/seeds/seed_categories.yml +15 -0
  61. squirrels/_package_data/base_project/seeds/seed_subcategories.csv +15 -0
  62. squirrels/_package_data/base_project/seeds/seed_subcategories.yml +21 -0
  63. squirrels/_package_data/base_project/squirrels.yml.j2 +61 -0
  64. squirrels/_package_data/templates/dataset_results.html +112 -0
  65. squirrels/_package_data/templates/oauth_login.html +271 -0
  66. squirrels/_package_data/templates/squirrels_studio.html +20 -0
  67. squirrels/_package_loader.py +8 -4
  68. squirrels/_parameter_configs.py +104 -103
  69. squirrels/_parameter_options.py +348 -0
  70. squirrels/_parameter_sets.py +57 -47
  71. squirrels/_parameters.py +1664 -0
  72. squirrels/_project.py +721 -0
  73. squirrels/_py_module.py +7 -5
  74. squirrels/_schemas/__init__.py +0 -0
  75. squirrels/_schemas/auth_models.py +167 -0
  76. squirrels/_schemas/query_param_models.py +75 -0
  77. squirrels/{_api_response_models.py → _schemas/response_models.py} +126 -47
  78. squirrels/_seeds.py +35 -16
  79. squirrels/_sources.py +110 -0
  80. squirrels/_utils.py +248 -73
  81. squirrels/_version.py +1 -1
  82. squirrels/arguments.py +7 -0
  83. squirrels/auth.py +4 -0
  84. squirrels/connections.py +3 -0
  85. squirrels/dashboards.py +2 -81
  86. squirrels/data_sources.py +14 -631
  87. squirrels/parameter_options.py +13 -348
  88. squirrels/parameters.py +14 -1266
  89. squirrels/types.py +16 -0
  90. squirrels-0.5.0.dist-info/METADATA +113 -0
  91. squirrels-0.5.0.dist-info/RECORD +97 -0
  92. {squirrels-0.4.1.dist-info → squirrels-0.5.0.dist-info}/WHEEL +1 -1
  93. squirrels-0.5.0.dist-info/entry_points.txt +3 -0
  94. {squirrels-0.4.1.dist-info → squirrels-0.5.0.dist-info/licenses}/LICENSE +1 -1
  95. squirrels/_authenticator.py +0 -85
  96. squirrels/_dashboards_io.py +0 -61
  97. squirrels/_environcfg.py +0 -84
  98. squirrels/arguments/init_time_args.py +0 -40
  99. squirrels/arguments/run_time_args.py +0 -208
  100. squirrels/package_data/assets/favicon.ico +0 -0
  101. squirrels/package_data/assets/index.css +0 -1
  102. squirrels/package_data/assets/index.js +0 -58
  103. squirrels/package_data/base_project/assets/expenses.db +0 -0
  104. squirrels/package_data/base_project/connections.yml +0 -7
  105. squirrels/package_data/base_project/dashboards/dashboard_example.py +0 -32
  106. squirrels/package_data/base_project/dashboards.yml +0 -10
  107. squirrels/package_data/base_project/env.yml +0 -29
  108. squirrels/package_data/base_project/models/dbviews/dbview_example.py +0 -47
  109. squirrels/package_data/base_project/models/dbviews/dbview_example.sql +0 -22
  110. squirrels/package_data/base_project/models/federates/federate_example.py +0 -21
  111. squirrels/package_data/base_project/models/federates/federate_example.sql +0 -3
  112. squirrels/package_data/base_project/pyconfigs/auth.py +0 -45
  113. squirrels/package_data/base_project/pyconfigs/connections.py +0 -19
  114. squirrels/package_data/base_project/pyconfigs/parameters.py +0 -95
  115. squirrels/package_data/base_project/seeds/seed_subcategories.csv +0 -15
  116. squirrels/package_data/base_project/squirrels.yml.j2 +0 -94
  117. squirrels/package_data/templates/index.html +0 -18
  118. squirrels/project.py +0 -378
  119. squirrels/user_base.py +0 -55
  120. squirrels-0.4.1.dist-info/METADATA +0 -117
  121. squirrels-0.4.1.dist-info/RECORD +0 -60
  122. squirrels-0.4.1.dist-info/entry_points.txt +0 -4
  123. /squirrels/{package_data → _package_data}/base_project/assets/weather.db +0 -0
  124. /squirrels/{package_data → _package_data}/base_project/seeds/seed_categories.csv +0 -0
  125. /squirrels/{package_data → _package_data}/base_project/tmp/.gitignore +0 -0
@@ -0,0 +1,145 @@
1
+ """
2
+ Data management routes for build and query models
3
+ """
4
+ from typing import Any
5
+ from fastapi import FastAPI, Depends, Request, Response, status
6
+ from fastapi.responses import JSONResponse
7
+ from fastapi.security import HTTPBearer
8
+ from dataclasses import asdict
9
+ from cachetools import TTLCache
10
+ import time
11
+
12
+ from .. import _constants as c, _utils as u
13
+ from .._schemas import response_models as rm
14
+ from .._exceptions import InvalidInputError
15
+ from .._schemas.auth_models import AbstractUser
16
+ from .._manifest import PermissionScope
17
+ from .._dataset_types import DatasetResult
18
+ from .._schemas.query_param_models import get_query_models_for_querying_models, get_query_models_for_compiled_models
19
+ from .base import RouteBase
20
+
21
+
22
+ class DataManagementRoutes(RouteBase):
23
+ """Data management routes for build and query operations"""
24
+
25
+ def __init__(self, get_bearer_token: HTTPBearer, project, no_cache: bool = False):
26
+ super().__init__(get_bearer_token, project, no_cache)
27
+
28
+ # Setup cache (shared with dataset results cache)
29
+ dataset_results_cache_size = int(self.env_vars.get(c.SQRL_DATASETS_CACHE_SIZE, 128))
30
+ dataset_results_cache_ttl = int(self.env_vars.get(c.SQRL_DATASETS_CACHE_TTL_MINUTES, 60))
31
+ self.query_models_cache = TTLCache(maxsize=dataset_results_cache_size, ttl=dataset_results_cache_ttl*60)
32
+
33
+ async def _query_models_helper(
34
+ self, sql_query: str, user: AbstractUser, selections: tuple[tuple[str, Any], ...], configurables: tuple[tuple[str, str], ...]
35
+ ) -> DatasetResult:
36
+ """Helper to query models"""
37
+ cfg_filtered = {k: v for k, v in dict(configurables).items() if k in self.manifest_cfg.configurables}
38
+ return await self.project.query_models(sql_query, user=user, selections=dict(selections), configurables=cfg_filtered)
39
+
40
+ async def _query_models_cachable(
41
+ self, sql_query: str, user: AbstractUser, selections: tuple[tuple[str, Any], ...], configurables: tuple[tuple[str, str], ...]
42
+ ) -> DatasetResult:
43
+ """Cachable version of query models helper"""
44
+ return await self.do_cachable_action(self.query_models_cache, self._query_models_helper, sql_query, user, selections, configurables)
45
+
46
+ async def _query_models_definition(
47
+ self, user: AbstractUser, all_request_params: dict, params: dict, *, headers: dict[str, str]
48
+ ) -> rm.DatasetResultModel:
49
+ """Query models definition"""
50
+ self._validate_request_params(all_request_params, params, headers)
51
+
52
+ if not self.authenticator.can_user_access_scope(user, PermissionScope.PRIVATE):
53
+ raise InvalidInputError(403, "unauthorized_access_to_query_models", f"User '{user}' does not have permission to query data models")
54
+
55
+ sql_query = params.get("x_sql_query")
56
+ if sql_query is None:
57
+ raise InvalidInputError(400, "sql_query_required", "SQL query must be provided")
58
+
59
+ query_models_function = self._query_models_helper if self.no_cache else self._query_models_cachable
60
+ uncached_keys = {"x_verify_params", "x_sql_query", "x_orientation", "x_limit", "x_offset"}
61
+ selections = self.get_selections_as_immutable(params, uncached_keys)
62
+ configurables = self.get_configurables_from_headers(headers)
63
+ result = await query_models_function(sql_query, user, selections, configurables)
64
+
65
+ orientation_header = headers.get("x-orientation")
66
+ orientation = str(orientation_header).lower() if orientation_header is not None else params.get("x_orientation", "records")
67
+ limit = params.get("x_limit", 1000)
68
+ offset = params.get("x_offset", 0)
69
+ return rm.DatasetResultModel(**result.to_json(orientation, limit, offset))
70
+
71
+ async def _get_compiled_model_definition(
72
+ self, model_name: str, user: AbstractUser, all_request_params: dict, params: dict, *, headers: dict[str, str]
73
+ ) -> rm.CompiledQueryModel:
74
+ """Get compiled model definition"""
75
+ normalized_model_name = u.normalize_name(model_name)
76
+ self._validate_request_params(all_request_params, params, headers)
77
+
78
+ # Internal users only
79
+ if not self.authenticator.can_user_access_scope(user, PermissionScope.PRIVATE):
80
+ raise InvalidInputError(403, "unauthorized_access_to_compile_model", f"User '{user}' does not have permission to fetch compiled SQL")
81
+
82
+ selections = self.get_selections_as_immutable(params, uncached_keys={"x_verify_params"})
83
+ configurables = self.get_configurables_from_headers(headers)
84
+ cfg_filtered = {k: v for k, v in dict(configurables).items() if k in self.manifest_cfg.configurables}
85
+ return await self.project.get_compiled_model_query(normalized_model_name, user=user, selections=dict(selections), configurables=cfg_filtered)
86
+
87
+ def setup_routes(self, app: FastAPI, project_metadata_path: str, param_fields: dict) -> None:
88
+ """Setup data management routes"""
89
+
90
+ # Build project endpoint
91
+ build_path = project_metadata_path + '/build'
92
+
93
+ @app.post(build_path, tags=["Data Management"], summary="Build or update the Virtual Data Lake (VDL) for the project")
94
+ async def build(user=Depends(self.get_current_user)): # type: ignore
95
+ if not self.authenticator.can_user_access_scope(user, PermissionScope.PRIVATE):
96
+ raise InvalidInputError(403, "unauthorized_access_to_build_model", f"User '{user}' does not have permission to build the virtual data lake (VDL)")
97
+ await self.project.build()
98
+ return Response(status_code=status.HTTP_200_OK)
99
+
100
+ # Query result endpoints
101
+ query_models_path = project_metadata_path + '/query-result'
102
+ QueryModelForQueryModels, QueryModelForPostQueryModels = get_query_models_for_querying_models(param_fields)
103
+
104
+ @app.get(query_models_path, tags=["Data Management"], response_class=JSONResponse)
105
+ async def query_models(
106
+ request: Request, params: QueryModelForQueryModels, user=Depends(self.get_current_user) # type: ignore
107
+ ) -> rm.DatasetResultModel:
108
+ start = time.time()
109
+ result = await self._query_models_definition(user, dict(request.query_params), asdict(params), headers=dict(request.headers))
110
+ self.log_activity_time("GET REQUEST for QUERY MODELS", start, request)
111
+ return result
112
+
113
+ @app.post(query_models_path, tags=["Data Management"], response_class=JSONResponse)
114
+ async def query_models_with_post(
115
+ request: Request, params: QueryModelForPostQueryModels, user=Depends(self.get_current_user) # type: ignore
116
+ ) -> rm.DatasetResultModel:
117
+ start = time.time()
118
+ payload: dict = await request.json()
119
+ result = await self._query_models_definition(user, payload, params.model_dump(), headers=dict(request.headers))
120
+ self.log_activity_time("POST REQUEST for QUERY MODELS", start, request)
121
+ return result
122
+
123
+ # Compiled models endpoints - TODO: remove duplication
124
+ compiled_models_path = project_metadata_path + '/compiled-models/{model_name}'
125
+ QueryModelForGetCompiled, QueryModelForPostCompiled = get_query_models_for_compiled_models(param_fields)
126
+
127
+ @app.get(compiled_models_path, tags=["Data Management"], response_class=JSONResponse, summary="Get compiled definition for a model")
128
+ async def get_compiled_model(
129
+ request: Request, model_name: str, params: QueryModelForGetCompiled, user=Depends(self.get_current_user)
130
+ ) -> rm.CompiledQueryModel:
131
+ start = time.time()
132
+ result = await self._get_compiled_model_definition(model_name, user, dict(request.query_params), asdict(params), headers=dict(request.headers))
133
+ self.log_activity_time("GET REQUEST for GET COMPILED MODEL", start, request)
134
+ return result
135
+
136
+ @app.post(compiled_models_path, tags=["Data Management"], response_class=JSONResponse, summary="Get compiled definition for a model")
137
+ async def get_compiled_model_with_post(
138
+ request: Request, model_name: str, params: QueryModelForPostCompiled, user=Depends(self.get_current_user)
139
+ ) -> rm.CompiledQueryModel:
140
+ start = time.time()
141
+ payload: dict = await request.json()
142
+ result = await self._get_compiled_model_definition(model_name, user, payload, params.model_dump(), headers=dict(request.headers))
143
+ self.log_activity_time("POST REQUEST for GET COMPILED MODEL", start, request)
144
+ return result
145
+
@@ -0,0 +1,257 @@
1
+ """
2
+ Dataset routes for parameters and results
3
+ """
4
+ from typing import Callable, Coroutine, Any
5
+ from pydantic import Field
6
+ from fastapi import FastAPI, Depends, Request
7
+ from fastapi.responses import JSONResponse
8
+ from fastapi.security import HTTPBearer
9
+
10
+ from mcp.server.fastmcp import FastMCP, Context
11
+ from dataclasses import asdict
12
+ from cachetools import TTLCache
13
+ from textwrap import dedent
14
+
15
+ import time, json
16
+
17
+ from .. import _constants as c, _utils as u
18
+ from .._schemas import response_models as rm
19
+ from .._exceptions import ConfigurationError, InvalidInputError
20
+ from .._dataset_types import DatasetResult
21
+ from .._schemas.query_param_models import get_query_models_for_parameters, get_query_models_for_dataset
22
+ from .._schemas.auth_models import AbstractUser
23
+ from .base import RouteBase
24
+
25
+
26
+ class DatasetRoutes(RouteBase):
27
+ """Dataset parameter and result routes"""
28
+
29
+ def __init__(self, get_bearer_token: HTTPBearer, project, no_cache: bool = False):
30
+ super().__init__(get_bearer_token, project, no_cache)
31
+
32
+ # Setup caches
33
+ dataset_results_cache_size = int(self.env_vars.get(c.SQRL_DATASETS_CACHE_SIZE, 128))
34
+ dataset_results_cache_ttl = int(self.env_vars.get(c.SQRL_DATASETS_CACHE_TTL_MINUTES, 60))
35
+ self.dataset_results_cache = TTLCache(maxsize=dataset_results_cache_size, ttl=dataset_results_cache_ttl*60)
36
+
37
+ # Setup max rows for AI
38
+ self.max_rows_for_ai = int(self.env_vars.get(c.SQRL_DATASETS_MAX_ROWS_FOR_AI, 100))
39
+
40
+ async def _get_dataset_results_helper(
41
+ self, dataset: str, user: AbstractUser, selections: tuple[tuple[str, Any], ...], configurables: tuple[tuple[str, str], ...]
42
+ ) -> DatasetResult:
43
+ """Helper to get dataset results"""
44
+ # Only pass configurables that are defined in manifest
45
+ cfg_filtered = {k: v for k, v in dict(configurables).items() if k in self.manifest_cfg.configurables}
46
+ return await self.project.dataset(dataset, user=user, selections=dict(selections), configurables=cfg_filtered)
47
+
48
+ async def _get_dataset_results_cachable(
49
+ self, dataset: str, user: AbstractUser, selections: tuple[tuple[str, Any], ...], configurables: tuple[tuple[str, str], ...]
50
+ ) -> DatasetResult:
51
+ """Cachable version of dataset results helper"""
52
+ return await self.do_cachable_action(self.dataset_results_cache, self._get_dataset_results_helper, dataset, user, selections, configurables)
53
+
54
+ async def _get_dataset_results_definition(
55
+ self, dataset_name: str, user: AbstractUser, all_request_params: dict, params: dict, headers: dict[str, str]
56
+ ) -> rm.DatasetResultModel:
57
+ """Get dataset results definition"""
58
+ self._validate_request_params(all_request_params, params, headers)
59
+
60
+ get_dataset_function = self._get_dataset_results_helper if self.no_cache else self._get_dataset_results_cachable
61
+ uncached_keys = {"x_verify_params", "x_orientation", "x_sql_query", "x_limit", "x_offset"}
62
+ selections = self.get_selections_as_immutable(params, uncached_keys)
63
+
64
+ user_has_elevated_privileges = u.user_has_elevated_privileges(user.access_level, self.project._elevated_access_level)
65
+ configurables = self.get_configurables_from_headers(headers) if user_has_elevated_privileges else tuple()
66
+ result = await get_dataset_function(dataset_name, user, selections, configurables)
67
+
68
+ # Apply optional final SQL transformation before select/limit/offset
69
+ sql_query = params.get("x_sql_query")
70
+ if sql_query:
71
+ try:
72
+ transformed = u.run_sql_on_dataframes(sql_query, {"result": result.df.lazy()})
73
+ except Exception as e:
74
+ raise InvalidInputError(400, "invalid_sql_query", "Failed to run provided SQL on the dataset result") from e
75
+
76
+ transformed = transformed.drop("_row_num", strict=False).with_row_index("_row_num", offset=1)
77
+ result = DatasetResult(target_model_config=result.target_model_config, df=transformed)
78
+
79
+ orientation_header = headers.get("x-orientation")
80
+ orientation = str(orientation_header).lower() if orientation_header is not None else params.get("x_orientation", "records")
81
+ limit = params.get("x_limit", 1000)
82
+ offset = params.get("x_offset", 0)
83
+ return rm.DatasetResultModel(**result.to_json(orientation, limit, offset))
84
+
85
+ def setup_routes(
86
+ self, app: FastAPI, mcp: FastMCP, project_metadata_path: str, project_name: str, project_label: str,
87
+ param_fields: dict, get_parameters_definition: Callable[..., Coroutine[Any, Any, rm.ParametersModel]]
88
+ ) -> None:
89
+ """Setup dataset routes"""
90
+
91
+ dataset_results_path = project_metadata_path + '/dataset/{dataset}'
92
+ dataset_parameters_path = dataset_results_path + '/parameters'
93
+
94
+ def validate_parameters_list(parameters: list[str] | None, entity_type: str, dataset_name: str) -> None:
95
+ if parameters is None:
96
+ return
97
+ for param in parameters:
98
+ if param not in param_fields:
99
+ all_params = list(param_fields.keys())
100
+ raise ConfigurationError(
101
+ f"{entity_type} '{dataset_name}' use parameter '{param}' which doesn't exist. Available parameters are:"
102
+ f"\n {all_params}"
103
+ )
104
+
105
+ async def get_dataset_parameters_updates(dataset_name: str, user: AbstractUser, all_request_params: dict, params: dict, headers: dict[str, str]):
106
+ parameters_list = self.manifest_cfg.datasets[dataset_name].parameters
107
+ scope = self.manifest_cfg.datasets[dataset_name].scope
108
+ result = await get_parameters_definition(
109
+ parameters_list, "dataset", dataset_name, scope, user, all_request_params, params, headers=headers
110
+ )
111
+ return result
112
+
113
+ # Dataset parameters and results APIs
114
+ for dataset_name, dataset_config in self.manifest_cfg.datasets.items():
115
+ dataset_name_for_api = u.normalize_name_for_api(dataset_name)
116
+ curr_parameters_path = dataset_parameters_path.format(dataset=dataset_name_for_api)
117
+ curr_results_path = dataset_results_path.format(dataset=dataset_name_for_api)
118
+
119
+ validate_parameters_list(dataset_config.parameters, "Dataset", dataset_name)
120
+
121
+ QueryModelForGetParams, QueryModelForPostParams = get_query_models_for_parameters(dataset_config.parameters, param_fields)
122
+ QueryModelForGetDataset, QueryModelForPostDataset = get_query_models_for_dataset(dataset_config.parameters, param_fields)
123
+
124
+ @app.get(curr_parameters_path, tags=[f"Dataset '{dataset_name}'"], description=self._parameters_description, response_class=JSONResponse)
125
+ async def get_dataset_parameters(
126
+ request: Request, params: QueryModelForGetParams, user=Depends(self.get_current_user) # type: ignore
127
+ ) -> rm.ParametersModel:
128
+ start = time.time()
129
+ curr_dataset_name = self.get_name_from_path_section(request, -2)
130
+ result = await get_dataset_parameters_updates(curr_dataset_name, user, dict(request.query_params), asdict(params), dict(request.headers))
131
+ self.log_activity_time("GET REQUEST for PARAMETERS", start, request)
132
+ return result
133
+
134
+ @app.post(curr_parameters_path, tags=[f"Dataset '{dataset_name}'"], description=self._parameters_description, response_class=JSONResponse)
135
+ async def get_dataset_parameters_with_post(
136
+ request: Request, params: QueryModelForPostParams, user=Depends(self.get_current_user) # type: ignore
137
+ ) -> rm.ParametersModel:
138
+ start = time.time()
139
+ curr_dataset_name = self.get_name_from_path_section(request, -2)
140
+ payload: dict = await request.json()
141
+ result = await get_dataset_parameters_updates(curr_dataset_name, user, payload, params.model_dump(), dict(request.headers))
142
+ self.log_activity_time("POST REQUEST for PARAMETERS", start, request)
143
+ return result
144
+
145
+ @app.get(curr_results_path, tags=[f"Dataset '{dataset_name}'"], description=dataset_config.description, response_class=JSONResponse)
146
+ async def get_dataset_results(
147
+ request: Request, params: QueryModelForGetDataset, user=Depends(self.get_current_user) # type: ignore
148
+ ) -> rm.DatasetResultModel:
149
+ start = time.time()
150
+ curr_dataset_name = self.get_name_from_path_section(request, -1)
151
+ result = await self._get_dataset_results_definition(
152
+ curr_dataset_name, user, dict(request.query_params), asdict(params), headers=dict(request.headers)
153
+ )
154
+ self.log_activity_time("GET REQUEST for DATASET RESULTS", start, request)
155
+ return result
156
+
157
+ @app.post(curr_results_path, tags=[f"Dataset '{dataset_name}'"], description=dataset_config.description, response_class=JSONResponse)
158
+ async def get_dataset_results_with_post(
159
+ request: Request, params: QueryModelForPostDataset, user=Depends(self.get_current_user) # type: ignore
160
+ ) -> rm.DatasetResultModel:
161
+ start = time.time()
162
+ curr_dataset_name = self.get_name_from_path_section(request, -1)
163
+ payload: dict = await request.json()
164
+ result = await self._get_dataset_results_definition(
165
+ curr_dataset_name, user, payload, params.model_dump(), headers=dict(request.headers)
166
+ )
167
+ self.log_activity_time("POST REQUEST for DATASET RESULTS", start, request)
168
+ return result
169
+
170
+ # Setup MCP tools
171
+
172
+ @mcp.tool(
173
+ name=f"get_dataset_parameters_from_{project_name}",
174
+ title=f"Get Dataset Parameters Updates (Project: {project_label})",
175
+ description=dedent(f"""
176
+ Use this tool to get updates for dataset parameters in the Squirrels project "{project_name}" when a selection is to be made on a parameter with "trigger_refresh" as true.
177
+
178
+ For example, suppose there are two parameters, "country" and "city", and the user selects "United States" for "country". If "country" has the "trigger_refresh" field as true, then this tool should be called to get the updates for other parameters such as "city".
179
+
180
+ Do not use this tool on parameters whose "trigger_refresh" field is false!
181
+ """).strip()
182
+ )
183
+ async def get_dataset_parameters_tool(
184
+ ctx: Context,
185
+ dataset: str = Field(description="The name of the dataset whose parameters the trigger parameter will update"),
186
+ parameter_name: str = Field(description="The name of the parameter triggering the refresh"),
187
+ selected_ids: list[str] = Field(description="The ID(s) of the selected option(s) for the parameter"),
188
+ ) -> rm.ParametersModel:
189
+ headers = self.get_headers_from_tool_ctx(ctx)
190
+ user = self.get_user_from_tool_headers(headers)
191
+ dataset_name = u.normalize_name(dataset)
192
+ payload = {
193
+ "x_parent_param": parameter_name,
194
+ parameter_name: selected_ids
195
+ }
196
+ return await get_dataset_parameters_updates(dataset_name, user, payload, payload, headers)
197
+
198
+ @mcp.tool(
199
+ name=f"get_dataset_results_from_{project_name}",
200
+ title=f"Get Dataset Results (Project: {project_label})",
201
+ description=dedent(f"""
202
+ Use this tool to get the dataset results as a JSON object for a dataset in the Squirrels project "{project_name}".
203
+ - Use the "offset" and "limit" arguments to limit the number of rows you require
204
+ - The "limit" argument controls the number of rows returned. The maximum allowed value is {self.max_rows_for_ai}. If the 'total_num_rows' field in the response is greater than {self.max_rows_for_ai}, let the user know that only {self.max_rows_for_ai} rows are shown and clarify if they would like to see more.
205
+ """).strip()
206
+ )
207
+ async def get_dataset_results_tool(
208
+ ctx: Context,
209
+ dataset: str = Field(description="The name of the dataset to get results for"),
210
+ parameters: str = Field(description=dedent("""
211
+ A JSON object (as string) containing key-value pairs for parameter name and selected value. The selected value to provide depends on the parameter widget type:
212
+ - For single select, use a string for the ID of the selected value
213
+ - For multi select, use an array of strings for the IDs of the selected values
214
+ - For date, use a string like "YYYY-MM-DD"
215
+ - For date ranges, use array of strings like ["YYYY-MM-DD","YYYY-MM-DD"]
216
+ - For number, use a number like 1
217
+ - For number ranges, use array of numbers like [1,100]
218
+ - For text, use a string for the text value
219
+ - Complex objects are NOT supported
220
+ """).strip()),
221
+ sql_query: str | None = Field(None, description=dedent("""
222
+ A custom DuckDB SQL query to execute on the final dataset result.
223
+ - Use table name 'result' to reference the dataset result.
224
+ - Use this to apply transformations to the dataset result if needed (such as filtering, sorting, or selecting columns).
225
+ - If not provided, the dataset result is returned as is.
226
+ """).strip()),
227
+ offset: int = Field(0, description="The number of rows to skip from first row. Applied after final SQL. Default is 0."),
228
+ limit: int = Field(self.max_rows_for_ai, description=f"The maximum number of rows to return. Applied after final SQL. Default is {self.max_rows_for_ai}. Maximum allowed value is {self.max_rows_for_ai}."),
229
+ ) -> rm.DatasetResultModel:
230
+ if limit > self.max_rows_for_ai:
231
+ raise ValueError(f"The maximum number of rows to return is {self.max_rows_for_ai}.")
232
+
233
+ headers = self.get_headers_from_tool_ctx(ctx)
234
+ user = self.get_user_from_tool_headers(headers)
235
+ dataset_name = u.normalize_name(dataset)
236
+
237
+ try:
238
+ params = json.loads(parameters)
239
+ except json.JSONDecodeError:
240
+ params = None # error handled below
241
+
242
+ if not isinstance(params, dict):
243
+ raise InvalidInputError(400, "invalid_parameters", f"The 'parameters' argument must be a JSON object.")
244
+
245
+ params.update({
246
+ "x_sql_query": sql_query,
247
+ "x_offset": offset,
248
+ "x_limit": limit
249
+ })
250
+
251
+ # Set default orientation as rows if not provided
252
+ if "x-orientation" not in headers:
253
+ headers["x-orientation"] = "rows"
254
+
255
+ result = await self._get_dataset_results_definition(dataset_name, user, params, params, headers)
256
+ return result
257
+