squirrels 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of squirrels might be problematic. Click here for more details.

Files changed (56) hide show
  1. squirrels/__init__.py +7 -3
  2. squirrels/_api_response_models.py +96 -72
  3. squirrels/_api_server.py +375 -201
  4. squirrels/_authenticator.py +23 -22
  5. squirrels/_command_line.py +70 -46
  6. squirrels/_connection_set.py +23 -25
  7. squirrels/_constants.py +29 -78
  8. squirrels/_dashboards_io.py +61 -0
  9. squirrels/_environcfg.py +53 -50
  10. squirrels/_initializer.py +184 -141
  11. squirrels/_manifest.py +168 -195
  12. squirrels/_models.py +159 -292
  13. squirrels/_package_loader.py +7 -8
  14. squirrels/_parameter_configs.py +173 -141
  15. squirrels/_parameter_sets.py +49 -38
  16. squirrels/_py_module.py +7 -7
  17. squirrels/_seeds.py +13 -12
  18. squirrels/_utils.py +114 -54
  19. squirrels/_version.py +1 -1
  20. squirrels/arguments/init_time_args.py +16 -10
  21. squirrels/arguments/run_time_args.py +89 -24
  22. squirrels/dashboards.py +82 -0
  23. squirrels/data_sources.py +212 -232
  24. squirrels/dateutils.py +29 -26
  25. squirrels/package_data/assets/index.css +1 -1
  26. squirrels/package_data/assets/index.js +27 -18
  27. squirrels/package_data/base_project/.gitignore +2 -2
  28. squirrels/package_data/base_project/connections.yml +1 -1
  29. squirrels/package_data/base_project/dashboards/dashboard_example.py +32 -0
  30. squirrels/package_data/base_project/dashboards.yml +10 -0
  31. squirrels/package_data/base_project/docker/.dockerignore +9 -4
  32. squirrels/package_data/base_project/docker/Dockerfile +7 -6
  33. squirrels/package_data/base_project/docker/compose.yml +1 -1
  34. squirrels/package_data/base_project/env.yml +2 -2
  35. squirrels/package_data/base_project/models/dbviews/{database_view1.py → dbview_example.py} +2 -1
  36. squirrels/package_data/base_project/models/dbviews/{database_view1.sql → dbview_example.sql} +3 -2
  37. squirrels/package_data/base_project/models/federates/{dataset_example.py → federate_example.py} +6 -6
  38. squirrels/package_data/base_project/models/federates/{dataset_example.sql → federate_example.sql} +1 -1
  39. squirrels/package_data/base_project/parameters.yml +6 -4
  40. squirrels/package_data/base_project/pyconfigs/auth.py +1 -1
  41. squirrels/package_data/base_project/pyconfigs/connections.py +1 -1
  42. squirrels/package_data/base_project/pyconfigs/context.py +38 -10
  43. squirrels/package_data/base_project/pyconfigs/parameters.py +15 -7
  44. squirrels/package_data/base_project/squirrels.yml.j2 +14 -7
  45. squirrels/package_data/templates/index.html +3 -3
  46. squirrels/parameter_options.py +103 -106
  47. squirrels/parameters.py +347 -195
  48. squirrels/project.py +378 -0
  49. squirrels/user_base.py +14 -6
  50. {squirrels-0.3.2.dist-info → squirrels-0.4.0.dist-info}/METADATA +12 -23
  51. squirrels-0.4.0.dist-info/RECORD +60 -0
  52. squirrels/_timer.py +0 -23
  53. squirrels-0.3.2.dist-info/RECORD +0 -56
  54. {squirrels-0.3.2.dist-info → squirrels-0.4.0.dist-info}/LICENSE +0 -0
  55. {squirrels-0.3.2.dist-info → squirrels-0.4.0.dist-info}/WHEEL +0 -0
  56. {squirrels-0.3.2.dist-info → squirrels-0.4.0.dist-info}/entry_points.txt +0 -0
squirrels/_manifest.py CHANGED
@@ -1,141 +1,63 @@
1
- from typing import Optional
2
- from dataclasses import dataclass, field
1
+ from typing import Any
2
+ from typing_extensions import Self
3
3
  from enum import Enum
4
- import yaml
4
+ from pydantic import BaseModel, Field, field_validator, model_validator, ValidationInfo, ValidationError
5
+ import yaml, time
5
6
 
6
- from . import _constants as c, _utils as u
7
- from ._environcfg import EnvironConfigIO
8
- from ._timer import timer, time
7
+ from . import _constants as c, _utils as _u
8
+ from ._environcfg import EnvironConfig
9
9
 
10
10
 
11
- @dataclass
12
- class ManifestComponentConfig:
13
- @classmethod
14
- def _validate_required(cls, data: dict, required_keys: list[str], section: str):
15
- for key in required_keys:
16
- if key not in data:
17
- raise u.ConfigurationError(f'In {c.MANIFEST_FILE}, required field missing in {section}: {key}')
18
-
19
- @classmethod
20
- def from_dict(cls, kwargs: dict):
21
- return cls()
11
+ class ProjectVarsConfig(BaseModel, extra="allow"):
12
+ name: str
13
+ label: str = ""
14
+ major_version: int
22
15
 
16
+ @model_validator(mode="after")
17
+ def finalize_label(self) -> Self:
18
+ if self.label == "":
19
+ self.label = self.name
20
+ return self
23
21
 
24
- @dataclass
25
- class ProjectVarsConfig(ManifestComponentConfig):
26
- data: dict
27
22
 
28
- def __post_init__(self):
29
- required_keys = [c.PROJECT_NAME_KEY, c.MAJOR_VERSION_KEY]
30
- self._validate_required(self.data, required_keys, c.PROJ_VARS_KEY)
31
-
32
- integer_keys = [c.MAJOR_VERSION_KEY]
33
- for key in integer_keys:
34
- if key in self.data and not isinstance(self.data[key], int):
35
- raise u.ConfigurationError(f'Project variable "{key}" must be an integer')
36
-
37
- @classmethod
38
- def from_dict(cls, kwargs: dict):
39
- return cls(kwargs)
40
-
41
- def get_name(self) -> str:
42
- return str(self.data[c.PROJECT_NAME_KEY])
43
-
44
- def get_label(self) -> str:
45
- return str(self.data.get(c.PROJECT_LABEL_KEY, self.get_name()))
46
-
47
- def get_major_version(self) -> int:
48
- return self.data[c.MAJOR_VERSION_KEY]
49
-
50
-
51
- @dataclass
52
- class PackageConfig(ManifestComponentConfig):
53
- git_url: str
54
- directory: str
23
+ class PackageConfig(BaseModel):
24
+ git: str
55
25
  revision: str
26
+ directory: str = ""
56
27
 
57
- @classmethod
58
- def from_dict(cls, kwargs: dict):
59
- cls._validate_required(kwargs, [c.PACKAGE_GIT_KEY, c.PACKAGE_REVISION_KEY], c.PACKAGES_KEY)
60
- git_url = str(kwargs[c.PACKAGE_GIT_KEY])
61
- directory_raw = kwargs.get(c.PACKAGE_DIRECTORY_KEY)
62
- directory = git_url.split('/')[-1].removesuffix('.git') if directory_raw is None else str(directory_raw)
63
- revision = str(kwargs[c.PACKAGE_REVISION_KEY])
64
- return cls(git_url, directory, revision)
28
+ @model_validator(mode="after")
29
+ def finalize_directory(self) -> Self:
30
+ if self.directory == "":
31
+ self.directory = self.git.split('/')[-1].removesuffix('.git')
32
+ return self
65
33
 
66
34
 
67
- @dataclass
68
- class DbConnConfig(ManifestComponentConfig):
35
+ class _ConfigWithNameBaseModel(BaseModel):
69
36
  name: str
37
+
38
+
39
+ class DbConnConfig(_ConfigWithNameBaseModel):
40
+ credential: str | None = None
70
41
  url: str
71
42
 
72
- @classmethod
73
- def from_dict(cls, kwargs: dict):
74
- cls._validate_required(kwargs, [c.DB_CONN_NAME_KEY, c.DB_CONN_URL_KEY], c.DB_CONNECTIONS_KEY)
75
- name = str(kwargs[c.DB_CONN_NAME_KEY])
76
- credential_key = kwargs.get(c.DB_CONN_CRED_KEY)
77
- username, password = EnvironConfigIO.obj.get_credential(credential_key)
78
- url = str(kwargs[c.DB_CONN_URL_KEY]).format(username=username, password=password)
79
- return cls(name, url)
43
+ def finalize_url(self, base_path: str, env_cfg: EnvironConfig) -> Self:
44
+ username, password = env_cfg.get_credential(self.credential)
45
+ self.url = self.url.format(username=username, password=password, project_path=base_path)
46
+ return self
80
47
 
81
48
 
82
- @dataclass
83
- class ParametersConfig(ManifestComponentConfig):
49
+ class ParametersConfig(BaseModel):
84
50
  type: str
85
51
  factory: str
86
- arguments: dict
87
-
88
- @classmethod
89
- def from_dict(cls, kwargs: dict):
90
- all_keys = [c.PARAMETER_TYPE_KEY, c.PARAMETER_FACTORY_KEY, c.PARAMETER_ARGS_KEY]
91
- cls._validate_required(kwargs, all_keys, c.PARAMETERS_KEY)
92
- args = {key: kwargs[key] for key in all_keys}
93
- return cls(**args)
94
-
95
-
96
- @dataclass
97
- class TestSetsConfig(ManifestComponentConfig):
98
- name: str
99
- datasets: Optional[list[str]] = None
100
- is_authenticated: bool = False
101
- user_attributes: dict = field(default_factory=dict)
102
- parameters: dict = field(default_factory=dict)
52
+ arguments: dict[str, Any]
103
53
 
104
- @classmethod
105
- def from_dict(cls, kwargs: dict):
106
- cls._validate_required(kwargs, [c.TEST_SET_NAME_KEY], c.TEST_SETS_KEY)
107
- name = str(kwargs[c.TEST_SET_NAME_KEY])
108
- datasets = kwargs.get(c.TEST_SET_DATASETS_KEY)
109
- is_authenticated = (c.TEST_SET_USER_ATTR_KEY in kwargs)
110
- user_attributes = kwargs.get(c.TEST_SET_USER_ATTR_KEY, {})
111
- parameters = kwargs.get(c.TEST_SET_PARAMETERS_KEY, {})
112
- return cls(name, datasets, is_authenticated, user_attributes, parameters)
113
-
114
-
115
- @dataclass
116
- class DbviewConfig(ManifestComponentConfig):
117
- name: str
118
- connection_name: Optional[str]
119
54
 
120
- @classmethod
121
- def from_dict(cls, kwargs: dict):
122
- cls._validate_required(kwargs, [c.DBVIEW_NAME_KEY], c.DBVIEWS_KEY)
123
- name = str(kwargs[c.DBVIEW_NAME_KEY])
124
- connection_name = str(kwargs.get(c.DBVIEW_CONN_KEY))
125
- return cls(name, connection_name)
55
+ class DbviewConfig(_ConfigWithNameBaseModel):
56
+ connection_name: str | None = None
126
57
 
127
58
 
128
- @dataclass
129
- class FederateConfig(ManifestComponentConfig):
130
- name: str
131
- materialized: Optional[str]
132
-
133
- @classmethod
134
- def from_dict(cls, kwargs: dict):
135
- cls._validate_required(kwargs, [c.FEDERATE_NAME_KEY], c.FEDERATES_KEY)
136
- name = str(kwargs[c.FEDERATE_NAME_KEY])
137
- materialized = str(kwargs.get(c.MATERIALIZED_KEY))
138
- return cls(name, materialized)
59
+ class FederateConfig(_ConfigWithNameBaseModel):
60
+ materialized: str | None = None
139
61
 
140
62
 
141
63
  class DatasetScope(Enum):
@@ -143,102 +65,153 @@ class DatasetScope(Enum):
143
65
  PROTECTED = 1
144
66
  PRIVATE = 2
145
67
 
146
- @dataclass
147
- class DatasetsConfig(ManifestComponentConfig):
148
- name: str
149
- label: str
150
- model: str
151
- scope: DatasetScope
152
- parameters: list[str]
153
- traits: dict
154
- default_test_set: Optional[str]
155
68
 
69
+ class AnalyticsOutputConfig(_ConfigWithNameBaseModel):
70
+ label: str = ""
71
+ description: str = ""
72
+ scope: DatasetScope = DatasetScope.PUBLIC
73
+ parameters: list[str] = Field(default_factory=list)
74
+
75
+ @model_validator(mode="after")
76
+ def finalize_label(self) -> Self:
77
+ if self.label == "":
78
+ self.label = self.name
79
+ return self
80
+
81
+ @field_validator("scope", mode="before")
156
82
  @classmethod
157
- def from_dict(cls, kwargs: dict):
158
- cls._validate_required(kwargs, [c.DATASET_NAME_KEY], c.DATASETS_KEY)
159
- name = str(kwargs[c.DATASET_NAME_KEY])
160
- label = str(kwargs.get(c.DATASET_LABEL_KEY, name))
161
- model = str(kwargs.get(c.DATASET_MODEL_KEY, name))
162
- scope_raw = kwargs.get(c.DATASET_SCOPE_KEY)
83
+ def validate_scope(cls, value: str, info: ValidationInfo) -> DatasetScope:
163
84
  try:
164
- scope = DatasetScope[str(scope_raw).upper()] if scope_raw is not None else DatasetScope.PUBLIC
85
+ return DatasetScope[str(value).upper()]
165
86
  except KeyError as e:
87
+ name = info.data.get("name")
166
88
  scope_list = [scope.name.lower() for scope in DatasetScope]
167
- raise u.ConfigurationError(f'Scope not found for dataset "{name}". Scope must be one of {scope_list}') from e
168
-
169
- parameters = kwargs.get(c.DATASET_PARAMETERS_KEY, [])
170
- traits = kwargs.get(c.DATASET_TRAITS_KEY, {})
171
- default_test_set = kwargs.get(c.DATASET_DEFAULT_TEST_SET_KEY)
172
- return cls(name, label, model, scope, parameters, traits, default_test_set)
89
+ raise ValueError(f'Scope "{value}" is invalid for dataset/dashboard "{name}". Scope must be one of {scope_list}') from e
173
90
 
174
91
 
175
- @dataclass
176
- class _ManifestConfig:
177
- project_variables: ProjectVarsConfig
178
- packages: list[PackageConfig]
179
- connections: dict[str, DbConnConfig]
180
- parameters: list[ParametersConfig]
181
- selection_test_sets: dict[str, TestSetsConfig]
182
- dbviews: dict[str, DbviewConfig]
183
- federates: dict[str, FederateConfig]
184
- datasets: dict[str, DatasetsConfig]
185
- settings: dict
92
+ class DatasetConfig(AnalyticsOutputConfig):
93
+ model: str = ""
94
+ traits: dict = Field(default_factory=dict)
95
+ default_test_set: str = ""
96
+
97
+ def __hash__(self) -> int:
98
+ return hash("dataset_"+self.name)
186
99
 
187
- @classmethod
188
- def _create_configs_as_dict(cls, config_cls: ManifestComponentConfig, kwargs: dict, section_key: str, name_key: str) -> dict:
189
- configs_dict = {}
190
- for x in kwargs.get(section_key, []):
191
- name = x[name_key]
192
- if name in configs_dict:
193
- raise u.ConfigurationError(f'In the "{section_key}" section of {c.MANIFEST_FILE}, the name/identifier "{name}" was specified multiple times')
194
- configs_dict[name] = config_cls.from_dict(x)
195
- return configs_dict
100
+ @model_validator(mode="after")
101
+ def finalize_model(self) -> Self:
102
+ if self.model == "":
103
+ self.model = self.name
104
+ return self
196
105
 
197
- @classmethod
198
- def from_dict(cls, kwargs: dict):
199
- settings: dict = kwargs.get(c.SETTINGS_KEY, {})
200
106
 
201
- try:
202
- proj_vars = ProjectVarsConfig(kwargs[c.PROJ_VARS_KEY])
203
- except KeyError as e:
204
- raise u.ConfigurationError(f'In {c.MANIFEST_FILE}, section for {c.PROJ_VARS_KEY} is required') from e
205
-
206
- packages = [PackageConfig.from_dict(x) for x in kwargs.get(c.PACKAGES_KEY, [])]
207
- all_package_dirs = set()
208
- for package in packages:
209
- if package.directory in all_package_dirs:
210
- raise u.ConfigurationError(f'In the "{c.PACKAGES_KEY}" section of {c.MANIFEST_FILE}, multiple target directories found for "{package.directory}"')
211
- all_package_dirs.add(package.directory)
107
+ class DashboardConfig(AnalyticsOutputConfig):
108
+ def __hash__(self) -> int:
109
+ return hash("dashboard_"+self.name)
212
110
 
213
- db_conns = cls._create_configs_as_dict(DbConnConfig, kwargs, c.DB_CONNECTIONS_KEY, c.DB_CONN_NAME_KEY)
214
- params = [ParametersConfig.from_dict(x) for x in kwargs.get(c.PARAMETERS_KEY, [])]
215
111
 
216
- test_sets = cls._create_configs_as_dict(TestSetsConfig, kwargs, c.TEST_SETS_KEY, c.TEST_SET_NAME_KEY)
217
- dbviews = cls._create_configs_as_dict(DbviewConfig, kwargs, c.DBVIEWS_KEY, c.DBVIEW_NAME_KEY)
218
- federates = cls._create_configs_as_dict(FederateConfig, kwargs, c.FEDERATES_KEY, c.FEDERATE_NAME_KEY)
219
- datasets = cls._create_configs_as_dict(DatasetsConfig, kwargs, c.DATASETS_KEY, c.DATASET_NAME_KEY)
112
+ class TestSetsConfig(_ConfigWithNameBaseModel):
113
+ datasets: list[str] | None = None
114
+ is_authenticated: bool = False
115
+ user_attributes: dict[str, Any] = Field(default_factory=dict)
116
+ parameters: dict[str, Any] = Field(default_factory=dict)
220
117
 
221
- return cls(proj_vars, packages, db_conns, params, test_sets, dbviews, federates, datasets, settings)
118
+ @model_validator(mode="after")
119
+ def finalize_is_authenticated(self) -> Self:
120
+ if len(self.user_attributes) > 0:
121
+ self.is_authenticated = True
122
+ return self
123
+
124
+
125
+ class Settings(BaseModel):
126
+ data: dict[str, Any]
127
+
128
+ def get_default_connection_name(self) -> str:
129
+ return self.data.get(c.DB_CONN_DEFAULT_USED_SETTING, c.DEFAULT_DB_CONN)
222
130
 
223
- def get_default_test_set(self, dataset_name: str) -> tuple[str, dict]:
224
- default_1 = self.datasets[dataset_name].default_test_set
225
- default_2 = self.settings.get(c.TEST_SET_DEFAULT_USED_SETTING, c.DEFAULT_TEST_SET_NAME)
226
- default_name = default_1 if default_1 is not None else default_2
227
- default_test_set = self.selection_test_sets.get(default_name, TestSetsConfig.from_dict({c.TEST_SET_NAME_KEY: default_name}))
228
- return default_name, default_test_set
131
+ def do_use_duckdb(self) -> bool:
132
+ return self.data.get(c.IN_MEMORY_DB_SETTING, c.SQLITE) == c.DUCKDB
133
+
134
+
135
+ class ManifestConfig(BaseModel):
136
+ env_cfg: EnvironConfig
137
+ project_variables: ProjectVarsConfig
138
+ packages: list[PackageConfig] = Field(default_factory=list)
139
+ connections: dict[str, DbConnConfig] = Field(default_factory=dict)
140
+ parameters: list[ParametersConfig] = Field(default_factory=list)
141
+ selection_test_sets: dict[str, TestSetsConfig] = Field(default_factory=dict)
142
+ dbviews: dict[str, DbviewConfig] = Field(default_factory=dict)
143
+ federates: dict[str, FederateConfig] = Field(default_factory=dict)
144
+ datasets: dict[str, DatasetConfig] = Field(default_factory=dict)
145
+ dashboards: dict[str, DashboardConfig] = Field(default_factory=dict)
146
+ settings: dict[str, Any] = Field(default_factory=dict)
147
+ base_path: str = "."
148
+
149
+ @field_validator("packages")
150
+ @classmethod
151
+ def package_directories_are_unique(cls, packages: list[PackageConfig]) -> list[PackageConfig]:
152
+ set_of_directories = set()
153
+ for package in packages:
154
+ if package.directory in set_of_directories:
155
+ raise ValueError(f'In the packages section, multiple target directories found for "{package.directory}"')
156
+ set_of_directories.add(package.directory)
157
+ return packages
158
+
159
+ @field_validator("connections", "selection_test_sets", "dbviews", "federates", "datasets", "dashboards", mode="before")
160
+ @classmethod
161
+ def names_are_unique(cls, values: list[dict] | dict[str, dict], info: ValidationInfo) -> dict[str, dict]:
162
+ if isinstance(values, list):
163
+ values_as_dict = {}
164
+ for obj in values:
165
+ name = obj["name"]
166
+ if name in values_as_dict:
167
+ raise ValueError(f'In the {info.field_name} section, the name "{name}" was specified multiple times')
168
+ values_as_dict[name] = obj
169
+ else:
170
+ values_as_dict = values
171
+ return values_as_dict
172
+
173
+ @model_validator(mode="after")
174
+ def finalize_connections(self) -> Self:
175
+ for conn in self.connections.values():
176
+ conn.finalize_url(self.base_path, self.env_cfg)
177
+ return self
178
+
179
+ @property
180
+ def settings_obj(self) -> Settings:
181
+ return Settings(data=self.settings)
182
+
183
+ def get_default_test_set(self, dataset_name: str) -> TestSetsConfig:
184
+ """
185
+ Raises KeyError if dataset name doesn't exist
186
+ """
187
+ default_name_1 = self.datasets[dataset_name].default_test_set
188
+ default_name_2 = self.settings.get(c.TEST_SET_DEFAULT_USED_SETTING, c.DEFAULT_TEST_SET_NAME)
189
+ default_name = default_name_1 if default_name_1 else default_name_2
190
+ default_test_set = self.selection_test_sets.get(default_name, TestSetsConfig(name=default_name))
191
+ return default_test_set
192
+
193
+ def get_applicable_test_sets(self, dataset: str) -> list[str]:
194
+ applicable_test_sets = []
195
+ for test_set_name, test_set_config in self.selection_test_sets.items():
196
+ if test_set_config.datasets is None or dataset in test_set_config.datasets:
197
+ applicable_test_sets.append(test_set_name)
198
+ return applicable_test_sets
229
199
 
230
200
 
231
201
  class ManifestIO:
232
- obj: _ManifestConfig
233
202
 
234
203
  @classmethod
235
- def LoadFromFile(cls) -> None:
236
- EnvironConfigIO.LoadFromFile()
237
-
204
+ def load_from_file(cls, logger: _u.Logger, base_path: str, env_cfg: EnvironConfig) -> ManifestConfig:
238
205
  start = time.time()
239
- raw_content = u.read_file(c.MANIFEST_FILE)
240
- env_vars = EnvironConfigIO.obj.get_all_env_vars()
241
- content = u.render_string(raw_content, env_vars=env_vars, **env_vars) # TODO: deprecate **env_vars
242
- proj_config = yaml.safe_load(content)
243
- cls.obj = _ManifestConfig.from_dict(proj_config)
244
- timer.add_activity_time(f"loading {c.MANIFEST_FILE} file", start)
206
+
207
+ raw_content = _u.read_file(_u.Path(base_path, c.MANIFEST_FILE))
208
+ env_vars = env_cfg.get_all_env_vars()
209
+ content = _u.render_string(raw_content, base_path=base_path, env_vars=env_vars)
210
+ manifest_content = yaml.safe_load(content)
211
+ try:
212
+ manifest_cfg = ManifestConfig(base_path=base_path, env_cfg=env_cfg, **manifest_content)
213
+ except ValidationError as e:
214
+ raise _u.ConfigurationError(f"Failed to process {c.MANIFEST_FILE} file. " + str(e)) from e
215
+
216
+ logger.log_activity_time(f"loading {c.MANIFEST_FILE} file", start)
217
+ return manifest_cfg