squirrels 0.1.0__py3-none-any.whl → 0.6.0.post0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. dateutils/__init__.py +6 -0
  2. dateutils/_enums.py +25 -0
  3. squirrels/dateutils.py → dateutils/_implementation.py +409 -380
  4. dateutils/types.py +6 -0
  5. squirrels/__init__.py +21 -18
  6. squirrels/_api_routes/__init__.py +5 -0
  7. squirrels/_api_routes/auth.py +337 -0
  8. squirrels/_api_routes/base.py +196 -0
  9. squirrels/_api_routes/dashboards.py +156 -0
  10. squirrels/_api_routes/data_management.py +148 -0
  11. squirrels/_api_routes/datasets.py +220 -0
  12. squirrels/_api_routes/project.py +289 -0
  13. squirrels/_api_server.py +552 -134
  14. squirrels/_arguments/__init__.py +0 -0
  15. squirrels/_arguments/init_time_args.py +83 -0
  16. squirrels/_arguments/run_time_args.py +111 -0
  17. squirrels/_auth.py +777 -0
  18. squirrels/_command_line.py +239 -107
  19. squirrels/_compile_prompts.py +147 -0
  20. squirrels/_connection_set.py +94 -0
  21. squirrels/_constants.py +141 -64
  22. squirrels/_dashboards.py +179 -0
  23. squirrels/_data_sources.py +570 -0
  24. squirrels/_dataset_types.py +91 -0
  25. squirrels/_env_vars.py +209 -0
  26. squirrels/_exceptions.py +29 -0
  27. squirrels/_http_error_responses.py +52 -0
  28. squirrels/_initializer.py +319 -110
  29. squirrels/_logging.py +121 -0
  30. squirrels/_manifest.py +357 -187
  31. squirrels/_mcp_server.py +578 -0
  32. squirrels/_model_builder.py +69 -0
  33. squirrels/_model_configs.py +74 -0
  34. squirrels/_model_queries.py +52 -0
  35. squirrels/_models.py +1201 -0
  36. squirrels/_package_data/base_project/.env +7 -0
  37. squirrels/_package_data/base_project/.env.example +44 -0
  38. squirrels/_package_data/base_project/connections.yml +16 -0
  39. squirrels/_package_data/base_project/dashboards/dashboard_example.py +40 -0
  40. squirrels/_package_data/base_project/dashboards/dashboard_example.yml +22 -0
  41. squirrels/_package_data/base_project/docker/.dockerignore +16 -0
  42. squirrels/_package_data/base_project/docker/Dockerfile +16 -0
  43. squirrels/_package_data/base_project/docker/compose.yml +7 -0
  44. squirrels/_package_data/base_project/duckdb_init.sql +10 -0
  45. squirrels/_package_data/base_project/gitignore +13 -0
  46. squirrels/_package_data/base_project/macros/macros_example.sql +17 -0
  47. squirrels/_package_data/base_project/models/builds/build_example.py +26 -0
  48. squirrels/_package_data/base_project/models/builds/build_example.sql +16 -0
  49. squirrels/_package_data/base_project/models/builds/build_example.yml +57 -0
  50. squirrels/_package_data/base_project/models/dbviews/dbview_example.sql +17 -0
  51. squirrels/_package_data/base_project/models/dbviews/dbview_example.yml +32 -0
  52. squirrels/_package_data/base_project/models/federates/federate_example.py +51 -0
  53. squirrels/_package_data/base_project/models/federates/federate_example.sql +21 -0
  54. squirrels/_package_data/base_project/models/federates/federate_example.yml +65 -0
  55. squirrels/_package_data/base_project/models/sources.yml +38 -0
  56. squirrels/_package_data/base_project/parameters.yml +142 -0
  57. squirrels/_package_data/base_project/pyconfigs/connections.py +19 -0
  58. squirrels/_package_data/base_project/pyconfigs/context.py +96 -0
  59. squirrels/_package_data/base_project/pyconfigs/parameters.py +141 -0
  60. squirrels/_package_data/base_project/pyconfigs/user.py +56 -0
  61. squirrels/_package_data/base_project/resources/expenses.db +0 -0
  62. squirrels/_package_data/base_project/resources/public/.gitkeep +0 -0
  63. squirrels/_package_data/base_project/resources/weather.db +0 -0
  64. squirrels/_package_data/base_project/seeds/seed_categories.csv +6 -0
  65. squirrels/_package_data/base_project/seeds/seed_categories.yml +15 -0
  66. squirrels/_package_data/base_project/seeds/seed_subcategories.csv +15 -0
  67. squirrels/_package_data/base_project/seeds/seed_subcategories.yml +21 -0
  68. squirrels/_package_data/base_project/squirrels.yml.j2 +61 -0
  69. squirrels/_package_data/base_project/tmp/.gitignore +2 -0
  70. squirrels/_package_data/templates/login_successful.html +53 -0
  71. squirrels/_package_data/templates/squirrels_studio.html +22 -0
  72. squirrels/_package_loader.py +29 -0
  73. squirrels/_parameter_configs.py +592 -0
  74. squirrels/_parameter_options.py +348 -0
  75. squirrels/_parameter_sets.py +207 -0
  76. squirrels/_parameters.py +1703 -0
  77. squirrels/_project.py +796 -0
  78. squirrels/_py_module.py +122 -0
  79. squirrels/_request_context.py +33 -0
  80. squirrels/_schemas/__init__.py +0 -0
  81. squirrels/_schemas/auth_models.py +83 -0
  82. squirrels/_schemas/query_param_models.py +70 -0
  83. squirrels/_schemas/request_models.py +26 -0
  84. squirrels/_schemas/response_models.py +286 -0
  85. squirrels/_seeds.py +97 -0
  86. squirrels/_sources.py +112 -0
  87. squirrels/_utils.py +540 -149
  88. squirrels/_version.py +1 -3
  89. squirrels/arguments.py +7 -0
  90. squirrels/auth.py +4 -0
  91. squirrels/connections.py +3 -0
  92. squirrels/dashboards.py +3 -0
  93. squirrels/data_sources.py +14 -282
  94. squirrels/parameter_options.py +13 -189
  95. squirrels/parameters.py +14 -801
  96. squirrels/types.py +18 -0
  97. squirrels-0.6.0.post0.dist-info/METADATA +148 -0
  98. squirrels-0.6.0.post0.dist-info/RECORD +101 -0
  99. {squirrels-0.1.0.dist-info → squirrels-0.6.0.post0.dist-info}/WHEEL +1 -2
  100. {squirrels-0.1.0.dist-info → squirrels-0.6.0.post0.dist-info}/entry_points.txt +1 -0
  101. squirrels-0.6.0.post0.dist-info/licenses/LICENSE +201 -0
  102. squirrels/_credentials_manager.py +0 -87
  103. squirrels/_module_loader.py +0 -37
  104. squirrels/_parameter_set.py +0 -151
  105. squirrels/_renderer.py +0 -286
  106. squirrels/_timed_imports.py +0 -37
  107. squirrels/connection_set.py +0 -126
  108. squirrels/package_data/base_project/.gitignore +0 -4
  109. squirrels/package_data/base_project/connections.py +0 -21
  110. squirrels/package_data/base_project/database/sample_database.db +0 -0
  111. squirrels/package_data/base_project/database/seattle_weather.db +0 -0
  112. squirrels/package_data/base_project/datasets/sample_dataset/context.py +0 -8
  113. squirrels/package_data/base_project/datasets/sample_dataset/database_view1.py +0 -23
  114. squirrels/package_data/base_project/datasets/sample_dataset/database_view1.sql.j2 +0 -7
  115. squirrels/package_data/base_project/datasets/sample_dataset/final_view.py +0 -10
  116. squirrels/package_data/base_project/datasets/sample_dataset/final_view.sql.j2 +0 -2
  117. squirrels/package_data/base_project/datasets/sample_dataset/parameters.py +0 -30
  118. squirrels/package_data/base_project/datasets/sample_dataset/selections.cfg +0 -6
  119. squirrels/package_data/base_project/squirrels.yaml +0 -26
  120. squirrels/package_data/static/favicon.ico +0 -0
  121. squirrels/package_data/static/script.js +0 -234
  122. squirrels/package_data/static/style.css +0 -110
  123. squirrels/package_data/templates/index.html +0 -32
  124. squirrels-0.1.0.dist-info/LICENSE +0 -22
  125. squirrels-0.1.0.dist-info/METADATA +0 -67
  126. squirrels-0.1.0.dist-info/RECORD +0 -40
  127. squirrels-0.1.0.dist-info/top_level.txt +0 -1
squirrels/_manifest.py CHANGED
@@ -1,187 +1,357 @@
1
- from typing import List, Dict, Any, Optional, Union
2
- from pathlib import Path
3
- from sqlalchemy import Engine, create_engine
4
- import yaml
5
-
6
- from squirrels import _constants as c, _utils
7
- from squirrels._credentials_manager import Credential, squirrels_config_io
8
- from squirrels._utils import ConfigurationError, InvalidInputError
9
-
10
-
11
- class Manifest:
12
- def __init__(self, parms: Dict) -> None:
13
- self._parms = parms
14
-
15
- @classmethod
16
- def from_yaml_str(cls, parms_str: str):
17
- parms = yaml.safe_load(parms_str)
18
- return cls(parms)
19
-
20
- @classmethod
21
- def from_file(cls, manifest_path: str):
22
- with open(manifest_path, 'r') as f:
23
- parms_str = f.read()
24
-
25
- return Manifest.from_yaml_str(parms_str)
26
-
27
- def get_proj_vars(self) -> Dict[str, Any]:
28
- return self._parms.get(c.PROJ_VARS_KEY, dict())
29
-
30
- def get_modules(self) -> List[str]:
31
- return self._parms.get(c.MODULES_KEY, list())
32
-
33
- def _get_required_field(self, key: str) -> Any:
34
- try:
35
- return self._parms[key]
36
- except KeyError as e:
37
- raise ConfigurationError(f'Field "{key}" not found in squirrels.yaml') from e
38
-
39
- def get_base_path(self) -> str:
40
- project_vars = self.get_proj_vars()
41
- try:
42
- product = project_vars[c.PRODUCT_KEY]
43
- major_version = project_vars[c.MAJOR_VERSION_KEY]
44
- except KeyError as e:
45
- raise ConfigurationError("Could not construct API endpoint as 'product' and 'major_version'" +
46
- "were not specified in project variables") from e
47
- base_path = f"/{product}/v{major_version}"
48
- return base_path
49
-
50
- def get_db_connections(self, test_creds: Dict[str, Credential] = None) -> Dict[str, Engine]:
51
- configs: Dict[str, Dict[str, str]] = self._parms.get(c.DB_CONNECTIONS_KEY, {})
52
- output = {}
53
- for key, config in configs.items():
54
- cred_key = config.get(c.DB_CREDENTIALS_KEY)
55
- if cred_key is None:
56
- cred = Credential("", "")
57
- elif test_creds is not None:
58
- cred = test_creds[cred_key]
59
- else:
60
- cred = squirrels_config_io.get_credential(cred_key)
61
- url = config[c.URL_KEY].replace("${username}", cred.username).replace("${password}", cred.password)
62
- output[key] = create_engine(url)
63
- return output
64
-
65
- def _get_dataset_parms(self, dataset: str) -> Dict[str, Any]:
66
- try:
67
- return self._get_required_field(c.DATASETS_KEY)[dataset]
68
- except KeyError as e:
69
- raise InvalidInputError(f'No such dataset named "{dataset}" exists') from e
70
-
71
- def _get_required_field_from_dataset_parms(self, dataset: str, key: str):
72
- try:
73
- return self._get_dataset_parms(dataset)[key]
74
- except KeyError as e:
75
- raise ConfigurationError(f'The "{key}" field is not defined for dataset "{dataset}"') from e
76
-
77
- def _get_all_database_view_parms(self, dataset: str) -> Dict[str, Dict[str, str]]:
78
- return self._get_required_field_from_dataset_parms(dataset, c.DATABASE_VIEWS_KEY)
79
-
80
- def get_all_dataset_names(self) -> str:
81
- datasets: Dict[str, Any] = self._get_required_field(c.DATASETS_KEY)
82
- return list(datasets.keys())
83
-
84
- def get_dataset_folder(self, dataset: str) -> Path:
85
- return _utils.join_paths(c.DATASETS_FOLDER, dataset)
86
-
87
- def get_dataset_args(self, dataset: str) -> Dict[str, Any]:
88
- dataset_args = self._get_dataset_parms(dataset).get("args", {})
89
- full_args = {**self.get_proj_vars(), **dataset_args}
90
- return full_args
91
-
92
- def get_all_database_view_names(self, dataset: str) -> List[str]:
93
- all_database_views = self._get_all_database_view_parms(dataset)
94
- return list(all_database_views.keys())
95
-
96
- def get_database_view_file(self, dataset: str, database_view: str) -> Path:
97
- database_view_parms = self._get_all_database_view_parms(dataset)[database_view]
98
- if isinstance(database_view_parms, str):
99
- db_view_file = database_view_parms
100
- else:
101
- try:
102
- db_view_file = database_view_parms[c.FILE_KEY]
103
- except KeyError as e:
104
- raise ConfigurationError(f'The "{c.FILE_KEY}" field is not defined for "{database_view}" in dataset "{dataset}"') from e
105
- dataset_folder = self.get_dataset_folder(dataset)
106
- return _utils.join_paths(dataset_folder, db_view_file)
107
-
108
- def get_view_args(self, dataset: str, database_view: str = None) -> Dict[str, Any]:
109
- dataset_args = self.get_dataset_args(dataset)
110
- if database_view is None:
111
- view_parms: Dict[str, Any] = self._get_required_field_from_dataset_parms(dataset, c.FINAL_VIEW_KEY)
112
- else:
113
- view_parms: Dict[str, Any] = self._get_all_database_view_parms(dataset)[database_view]
114
- view_args: Dict[str, Any] = {} if isinstance(view_parms, str) else view_parms.get("args", {})
115
- full_args = {**dataset_args, **view_args}
116
- return full_args
117
-
118
- def get_database_view_db_connection(self, dataset: str, database_view: str) -> Optional[str]:
119
- database_view_parms = self._get_all_database_view_parms(dataset)[database_view]
120
- if isinstance(database_view_parms, str):
121
- db_connection = c.DEFAULT_DB_CONN
122
- else:
123
- db_connection = database_view_parms.get(c.DB_CONNECTION_KEY, c.DEFAULT_DB_CONN)
124
- return db_connection
125
-
126
- def get_dataset_label(self, dataset: str) -> str:
127
- return self._get_required_field_from_dataset_parms(dataset, c.DATASET_LABEL_KEY)
128
-
129
- def get_dataset_final_view_file(self, dataset: str) -> Union[str, Path]:
130
- final_view_parms: Dict[str, Any] = self._get_required_field_from_dataset_parms(dataset, c.FINAL_VIEW_KEY)
131
- if isinstance(final_view_parms, str):
132
- final_view_file = final_view_parms
133
- else:
134
- try:
135
- final_view_file = final_view_parms[c.FILE_KEY]
136
- except KeyError as e:
137
- raise ConfigurationError(f'The "{c.FILE_KEY}" field is not defined for the final view') from e
138
-
139
- database_views = self.get_all_database_view_names(dataset)
140
- if final_view_file in database_views:
141
- return final_view_file
142
- else:
143
- dataset_path = self.get_dataset_folder(dataset)
144
- return _utils.join_paths(dataset_path, final_view_file)
145
-
146
- def get_setting(self, key: str, default: Any) -> Any:
147
- settings: Dict[str, Any] = self._parms.get(c.SETTINGS_KEY, dict())
148
- return settings.get(key, default)
149
-
150
- def get_catalog(self, parameters_path: str, results_path: str) -> Any:
151
- """
152
- Gets the component of the catalog API response that's generated by this manifest
153
-
154
- Parameters:
155
- parameters_path: The path to the parameters API endpoint
156
- results_path: The path to the results API endpoint
157
-
158
- Returns:
159
- A JSON response for the catalog API
160
- """
161
- datasets_info = []
162
- for dataset in self.get_all_dataset_names():
163
- dataset_normalized = _utils.normalize_name_for_api(dataset)
164
- datasets_info.append({
165
- 'name': dataset,
166
- 'label': self.get_dataset_label(dataset),
167
- 'parameters_path': parameters_path.format(dataset=dataset_normalized),
168
- 'result_path': results_path.format(dataset=dataset_normalized),
169
- 'first_minor_version': 0
170
- })
171
-
172
- project_vars = self.get_proj_vars()
173
- return {
174
- 'response_version': 0,
175
- 'products': [{
176
- 'name': project_vars[c.PRODUCT_KEY],
177
- 'versions': [{
178
- 'major_version': project_vars[c.MAJOR_VERSION_KEY],
179
- 'latest_minor_version': project_vars[c.MINOR_VERSION_KEY],
180
- 'datasets': datasets_info
181
- }]
182
- }]
183
- }
184
-
185
-
186
- def _from_file():
187
- return Manifest.from_file(c.MANIFEST_FILE)
1
+ from functools import cached_property
2
+ from typing import Literal, Any
3
+ from urllib.parse import urlparse
4
+ from sqlalchemy import Engine, create_engine
5
+ from typing_extensions import Self
6
+ from enum import Enum
7
+ from pydantic import BaseModel, Field, field_validator, model_validator, ValidationInfo, ValidationError
8
+ import yaml, time, re
9
+
10
+ from . import _constants as c, _utils as u
11
+
12
+
13
+ class _ConfigWithNameBaseModel(BaseModel):
14
+ name: str
15
+
16
+ @field_validator("name")
17
+ @classmethod
18
+ def validate_name(cls, v: str) -> str:
19
+ if not re.fullmatch(r"[A-Za-z0-9_-]+", v):
20
+ raise ValueError("Name must only contain alphanumeric characters, underscores, and dashes.")
21
+ return v
22
+
23
+
24
+ class AuthType(Enum):
25
+ REQUIRED = "required"
26
+ OPTIONAL = "optional"
27
+ NOTSET = "notset"
28
+
29
+
30
+ class AuthStrategy(Enum):
31
+ MANAGED = "managed"
32
+ EXTERNAL = "external"
33
+
34
+
35
+ class ProjectVarsConfig(_ConfigWithNameBaseModel, extra="allow"):
36
+ major_version: int
37
+ label: str = ""
38
+ description: str = ""
39
+ auth_type: AuthType = AuthType.NOTSET
40
+ auth_strategy: AuthStrategy = AuthStrategy.MANAGED
41
+
42
+ @model_validator(mode="after")
43
+ def set_auth_strategy_defaults(self) -> Self:
44
+ if self.auth_strategy == AuthStrategy.EXTERNAL and self.auth_type == AuthType.OPTIONAL:
45
+ raise ValueError("auth_type can not be optional when auth_strategy is external")
46
+
47
+ if self.auth_type == AuthType.NOTSET:
48
+ self.auth_type = AuthType.REQUIRED if self.auth_strategy == AuthStrategy.EXTERNAL else AuthType.OPTIONAL
49
+
50
+ return self
51
+
52
+ @model_validator(mode="after")
53
+ def finalize_label(self) -> Self:
54
+ if self.label == "":
55
+ self.label = u.to_title_case(self.name)
56
+ return self
57
+
58
+
59
+ class PackageConfig(BaseModel):
60
+ git: str
61
+ revision: str
62
+ directory: str = ""
63
+
64
+ @model_validator(mode="after")
65
+ def finalize_directory(self) -> Self:
66
+ if self.directory == "":
67
+ self.directory = self.git.split('/')[-1].removesuffix('.git')
68
+ return self
69
+
70
+
71
+ class ConnectionTypeEnum(Enum):
72
+ SQLALCHEMY = "sqlalchemy"
73
+ CONNECTORX = "connectorx"
74
+ ADBC = "adbc"
75
+ DUCKDB = "duckdb"
76
+
77
+
78
+ class ConnectionProperties(BaseModel):
79
+ """
80
+ A class for holding the properties of a connection
81
+
82
+ Arguments:
83
+ type: The type of connection, one of "sqlalchemy", "connectorx", or "adbc"
84
+ uri: The URI for the connection
85
+ """
86
+ label: str | None = None
87
+ type: ConnectionTypeEnum = Field(default=ConnectionTypeEnum.SQLALCHEMY)
88
+ uri: str
89
+ sa_create_engine_args: dict[str, Any] = Field(default_factory=dict)
90
+
91
+ @cached_property
92
+ def engine(self) -> Engine:
93
+ """
94
+ Creates and caches a SQLAlchemy engine if the connection type is sqlalchemy.
95
+ Returns None for other connection types.
96
+ """
97
+ if self.type == ConnectionTypeEnum.SQLALCHEMY:
98
+ return create_engine(self.uri, **self.sa_create_engine_args)
99
+ else:
100
+ raise ValueError(f'Connection type "{self.type}" does not support engine property')
101
+
102
+ @cached_property
103
+ def dialect(self) -> str:
104
+ default_dialect = None
105
+ if self.type == ConnectionTypeEnum.SQLALCHEMY:
106
+ dialect = self.engine.dialect.name
107
+ elif self.type == ConnectionTypeEnum.DUCKDB:
108
+ dialect = self.uri.split(':')[0]
109
+ default_dialect = 'duckdb'
110
+ else:
111
+ url = urlparse(self.uri)
112
+ dialect = url.scheme
113
+
114
+ processed_dialect = next((d for d in ['sqlite', 'postgres', 'mysql', 'duckdb'] if dialect.lower().startswith(d)), default_dialect)
115
+ dialect = processed_dialect if processed_dialect is not None else dialect
116
+ return dialect
117
+
118
+ @cached_property
119
+ def attach_uri_for_duckdb(self) -> str | None:
120
+ if self.type == ConnectionTypeEnum.DUCKDB:
121
+ return self.uri
122
+ elif self.type == ConnectionTypeEnum.SQLALCHEMY:
123
+ url = self.engine.url
124
+ host = url.host
125
+ port = url.port
126
+ username = url.username
127
+ password = url.password
128
+ database = url.database
129
+ database_as_file = database if database is not None else ""
130
+ else:
131
+ url = urlparse(self.uri)
132
+ host = url.hostname
133
+ port = url.port
134
+ username = url.username
135
+ password = url.password
136
+ database = url.path.lstrip('/')
137
+ database_as_file = self.uri.replace(f"{self.dialect}://", "")
138
+
139
+ if self.dialect in ('postgres', 'mysql'):
140
+ attach_uri = f"{self.dialect}:dbname={database} user={username} password={password} host={host} port={port}"
141
+ elif self.dialect == "sqlite":
142
+ attach_uri = f"{self.dialect}:{database_as_file}"
143
+ elif self.dialect == "duckdb":
144
+ attach_uri = database_as_file
145
+ else:
146
+ attach_uri = None
147
+
148
+ return attach_uri
149
+
150
+
151
+ class DbConnConfig(ConnectionProperties, _ConfigWithNameBaseModel):
152
+ def finalize_uri(self, project_path: str) -> Self:
153
+ self.uri = self.uri.format(project_path=project_path)
154
+ return self
155
+
156
+
157
+ class ConfigurableOverride(BaseModel):
158
+ name: str
159
+ default: str
160
+
161
+
162
+ class ConfigurablesConfig(ConfigurableOverride):
163
+ label: str = ""
164
+ description: str = ""
165
+
166
+
167
+ class ParametersConfig(BaseModel):
168
+ type: str
169
+ factory: str
170
+ arguments: dict[str, Any]
171
+
172
+
173
+ class PermissionScope(Enum):
174
+ PUBLIC = 0
175
+ PROTECTED = 1
176
+ PRIVATE = 2
177
+
178
+
179
+ class AnalyticsOutputConfig(_ConfigWithNameBaseModel):
180
+ label: str = ""
181
+ description: str = ""
182
+ scope: PermissionScope | None = None
183
+ parameters: list[str] | None = Field(default=None, description="The list of parameter names used by the dataset/dashboard")
184
+ configurables: list[ConfigurableOverride] = Field(default_factory=list)
185
+ project_configurables: dict[str, Any] | None = Field(default=None, exclude=True)
186
+
187
+ @model_validator(mode="after")
188
+ def finalize_label(self) -> Self:
189
+ if self.label == "":
190
+ self.label = self.name
191
+ return self
192
+
193
+ @field_validator("scope", mode="before")
194
+ @classmethod
195
+ def validate_scope(cls, value: Any, info: ValidationInfo) -> PermissionScope | None:
196
+ if value is None:
197
+ return None
198
+ try:
199
+ return PermissionScope[str(value).upper()]
200
+ except KeyError as e:
201
+ name = info.data.get("name")
202
+ scope_list = [scope.name.lower() for scope in PermissionScope]
203
+ raise ValueError(f'Scope "{value}" is invalid for dataset/dashboard "{name}". Scope must be one of {scope_list}') from e
204
+
205
+ @model_validator(mode="after")
206
+ def validate_configurables(self) -> Self:
207
+ if self.project_configurables is not None:
208
+ for cfg_override in self.configurables:
209
+ if cfg_override.name not in self.project_configurables:
210
+ # Determine if it's a dataset or dashboard for better error message
211
+ class_name = self.__class__.__name__
212
+ type_str = "Dataset" if "Dataset" in class_name else "Dashboard" if "Dashboard" in class_name else "Asset"
213
+ raise ValueError(
214
+ f'{type_str} "{self.name}" references configurable "{cfg_override.name}" which is not defined '
215
+ f'in the project configurables'
216
+ )
217
+ return self
218
+
219
+
220
+ class DatasetConfig(AnalyticsOutputConfig):
221
+ model: str = ""
222
+
223
+ def __hash__(self) -> int:
224
+ return hash("dataset_"+self.name)
225
+
226
+ @model_validator(mode="after")
227
+ def finalize_model(self) -> Self:
228
+ if self.model == "":
229
+ self.model = self.name
230
+ return self
231
+
232
+
233
+ class TestSetsUserConfig(BaseModel):
234
+ access_level: Literal["admin", "member", "guest"] = "guest"
235
+ custom_fields: dict[str, Any] = Field(default_factory=dict)
236
+
237
+ class TestSetsConfig(_ConfigWithNameBaseModel):
238
+ user: TestSetsUserConfig = Field(default_factory=TestSetsUserConfig)
239
+ parameters: dict[str, Any] = Field(default_factory=dict)
240
+ configurables: dict[str, Any] = Field(default_factory=dict)
241
+
242
+
243
+ class ManifestConfig(BaseModel):
244
+ project_variables: ProjectVarsConfig
245
+ packages: list[PackageConfig] = Field(default_factory=list)
246
+ connections: dict[str, DbConnConfig] = Field(default_factory=dict)
247
+ parameters: list[ParametersConfig] = Field(default_factory=list)
248
+ configurables: dict[str, ConfigurablesConfig] = Field(default_factory=dict)
249
+ selection_test_sets: dict[str, TestSetsConfig] = Field(default_factory=dict)
250
+ datasets: dict[str, DatasetConfig] = Field(default_factory=dict)
251
+ project_path: str = "."
252
+
253
+ @field_validator("packages")
254
+ @classmethod
255
+ def package_directories_are_unique(cls, packages: list[PackageConfig]) -> list[PackageConfig]:
256
+ set_of_directories = set()
257
+ for package in packages:
258
+ if package.directory in set_of_directories:
259
+ raise ValueError(f'In the packages section, multiple target directories found for "{package.directory}"')
260
+ set_of_directories.add(package.directory)
261
+ return packages
262
+
263
+ @field_validator("connections", "selection_test_sets", "datasets", "configurables", mode="before")
264
+ @classmethod
265
+ def names_are_unique(cls, values: list[dict] | dict[str, dict], info: ValidationInfo) -> dict[str, dict]:
266
+ if isinstance(values, list):
267
+ values_as_dict = {}
268
+ for obj in values:
269
+ name = u.normalize_name(obj["name"])
270
+ if name in values_as_dict:
271
+ raise ValueError(f'In the {info.field_name} section, the name "{name}" was specified multiple times')
272
+ values_as_dict[name] = obj
273
+ else:
274
+ values_as_dict = values
275
+ return values_as_dict
276
+
277
+ @model_validator(mode="after")
278
+ def finalize_connections(self) -> Self:
279
+ for conn in self.connections.values():
280
+ conn.finalize_uri(self.project_path)
281
+ return self
282
+
283
+ @model_validator(mode="after")
284
+ def validate_authentication_and_scopes(self) -> Self:
285
+ """
286
+ Enforce authentication rules:
287
+ - Set default scope based on auth_type if not specified.
288
+ - If auth_type is REQUIRED, no dataset may be PUBLIC.
289
+ """
290
+ is_auth_required = self.project_variables.auth_type == AuthType.REQUIRED
291
+ default_scope = PermissionScope.PROTECTED if is_auth_required else PermissionScope.PUBLIC
292
+
293
+ for ds in self.datasets.values():
294
+ if ds.scope is None:
295
+ ds.scope = default_scope
296
+
297
+ if is_auth_required:
298
+ invalid = [name for name, ds in self.datasets.items() if ds.scope == PermissionScope.PUBLIC]
299
+ if invalid:
300
+ raise ValueError(
301
+ "Authentication is required, so datasets cannot be public.\n "
302
+ f"Update the scope for the following datasets: {invalid}\n "
303
+ )
304
+ return self
305
+
306
+ @model_validator(mode="after")
307
+ def validate_dataset_configurables(self) -> Self:
308
+ """
309
+ Validate that dataset configurables reference valid project-level configurables.
310
+ """
311
+ for dataset_cfg in self.datasets.values():
312
+ dataset_cfg.project_configurables = self.configurables
313
+ dataset_cfg.validate_configurables()
314
+ return self
315
+
316
+ def get_default_test_set(self) -> TestSetsConfig:
317
+ """
318
+ Raises KeyError if dataset name doesn't exist
319
+ """
320
+ default_default_test_set = TestSetsConfig(name=c.DEFAULT_TEST_SET_NAME)
321
+ default_test_set = self.selection_test_sets.get(c.DEFAULT_TEST_SET_NAME, default_default_test_set)
322
+ return default_test_set
323
+
324
+ def get_default_configurables(self, *, overrides: list[ConfigurableOverride] = []) -> dict[str, str]:
325
+ """
326
+ Return a dictionary of configurable name to its default value.
327
+
328
+ Arguments:
329
+ overrides: A list of ConfigurableOverride objects to merge with the project-level defaults.
330
+ """
331
+ defaults: dict[str, str] = {}
332
+ for name, cfg in self.configurables.items():
333
+ defaults[name] = str(cfg.default)
334
+
335
+ # Apply explicit overrides if provided
336
+ for cfg_override in overrides:
337
+ defaults[cfg_override.name] = cfg_override.default
338
+
339
+ return defaults
340
+
341
+
342
+ class ManifestIO:
343
+ @classmethod
344
+ def load_from_file(cls, logger: u.Logger, project_path: str, env_vars_unformatted: dict[str, str]) -> ManifestConfig:
345
+ start = time.time()
346
+
347
+ raw_content = u.read_file(u.Path(project_path, c.MANIFEST_FILE))
348
+ content = u.render_string(raw_content, project_path=project_path, env_vars=env_vars_unformatted)
349
+ manifest_content: dict[str, Any] = yaml.safe_load(content)
350
+
351
+ try:
352
+ manifest_cfg = ManifestConfig(project_path=project_path, **manifest_content)
353
+ except ValidationError as e:
354
+ raise u.ConfigurationError(f"Failed to process {c.MANIFEST_FILE} file. " + str(e)) from e
355
+
356
+ logger.log_activity_time(f"loading {c.MANIFEST_FILE} file", start)
357
+ return manifest_cfg