tundri 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tundri/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ from dotenv import load_dotenv
2
+
3
+ load_dotenv()
tundri/cli.py ADDED
@@ -0,0 +1,98 @@
1
+ import argparse
2
+ import logging
3
+ import sys
4
+
5
+ from rich.console import Console
6
+ from rich.logging import RichHandler
7
+
8
+ from tundri.core import drop_create_objects
9
+ from tundri.utils import (
10
+ run_command,
11
+ log_dry_run_info,
12
+ load_env_var
13
+ )
14
+
15
+
16
+ logging.basicConfig(
17
+ level="WARN", format="%(message)s", datefmt="[%X]", handlers=[RichHandler()]
18
+ )
19
+ log = logging.getLogger(__name__)
20
+ log.setLevel("INFO")
21
+ console = Console()
22
+
23
+
24
+ def drop_create(args):
25
+ console.log("[bold][purple]Drop/create Snowflake objects[/purple] started[/bold]")
26
+ if args.dry:
27
+ log_dry_run_info()
28
+ is_success = drop_create_objects(args.permifrost_spec_path, args.dry)
29
+ if is_success:
30
+ console.log(
31
+ "[bold][purple]\nDrop/create Snowflake objects[/purple] completed successfully[/bold]\n"
32
+ )
33
+ else:
34
+ sys.exit(1)
35
+
36
+
37
+ def permifrost(args):
38
+ console.log("[bold][purple]Permifrost[/purple] started[/bold]")
39
+ cmd = [
40
+ "permifrost",
41
+ "run",
42
+ args.permifrost_spec_path,
43
+ "--ignore-missing-entities-dry-run",
44
+ ]
45
+
46
+ if args.dry:
47
+ cmd.append("--dry")
48
+ log_dry_run_info()
49
+
50
+ console.log(f"Running command: \n[italic]{' '.join(cmd)}[/italic]\n")
51
+ run_command(cmd)
52
+ console.log("[bold][purple]Permifrost[/purple] completed successfully[bold]\n")
53
+
54
+
55
+ def run(args):
56
+ drop_create(args)
57
+ permifrost(args)
58
+
59
+
60
+ def main():
61
+ parser = argparse.ArgumentParser(
62
+ description="tundri - Drop, create and alter Snowflake objects and set permissions with Permifrost"
63
+ )
64
+ subparsers = parser.add_subparsers()
65
+
66
+ # Drop/create functionality
67
+ parser_drop_create = subparsers.add_parser("drop_create", help="Drop, create and alter Snowflake objects")
68
+ parser_drop_create.add_argument(
69
+ "-p", "--permifrost_spec_path", "--filepath", required=True
70
+ )
71
+ parser_drop_create.add_argument("--dry", action="store_true", help="Run in dry mode")
72
+ parser_drop_create.set_defaults(func=drop_create)
73
+
74
+ # Permifrost functionality
75
+ parser_drop_create = subparsers.add_parser("permifrost", help="Run Permifrost")
76
+ parser_drop_create.add_argument(
77
+ "-p", "--permifrost_spec_path", "--filepath", required=True
78
+ )
79
+ parser_drop_create.add_argument("--dry", action="store_true", help="Run in dry mode")
80
+ parser_drop_create.set_defaults(func=permifrost)
81
+
82
+ # Run both
83
+ parser_drop_create = subparsers.add_parser("run", help="Run drop_create and then permifrost")
84
+ parser_drop_create.add_argument(
85
+ "-p", "--permifrost_spec_path", "--filepath", required=True
86
+ )
87
+ parser_drop_create.add_argument("--dry", action="store_true", help="Run in dry mode")
88
+ parser_drop_create.set_defaults(func=run)
89
+
90
+ args = parser.parse_args()
91
+ # Loading .env here, because function needs access to the path to config .yml, as
92
+ # the .env is expected to live in the same directory as the .yml
93
+ load_env_var(args.permifrost_spec_path)
94
+ args.func(args)
95
+
96
+
97
+ if __name__ == "__main__":
98
+ main()
tundri/constants.py ADDED
@@ -0,0 +1,47 @@
1
+ import os
2
+
3
+ from tundri.objects import (
4
+ Warehouse,
5
+ Database,
6
+ User,
7
+ Role,
8
+ Schema,
9
+ )
10
+
11
+
12
+ ENV_VAR_OVERRIDE_ROLE = "TUNDRI_DROP_CREATE_ROLE"
13
+
14
+ OBJECT_TYPE_MAP = {
15
+ "warehouse": Warehouse,
16
+ "database": Database,
17
+ "user": User,
18
+ "role": Role,
19
+ "schema": Schema,
20
+ }
21
+
22
+ OBJECT_TYPES = list(OBJECT_TYPE_MAP.keys())
23
+
24
+ # For backwards compatibility, we allow setting the role via environment variables
25
+ # so previous setups that used `permifrost` role will continue to work
26
+ OBJECT_ROLE_MAP = {
27
+ "warehouse": os.getenv(ENV_VAR_OVERRIDE_ROLE, "SYSADMIN"),
28
+ "database": os.getenv(ENV_VAR_OVERRIDE_ROLE, "SYSADMIN"),
29
+ "schema": os.getenv(ENV_VAR_OVERRIDE_ROLE, "SYSADMIN"),
30
+ "user": os.getenv(ENV_VAR_OVERRIDE_ROLE, "SECURITYADMIN"),
31
+ "role": os.getenv(ENV_VAR_OVERRIDE_ROLE, "SECURITYADMIN"),
32
+ }
33
+ # SYSADMIN can't see objects after ownership is transferred,
34
+ # so we need to alwaysuse SECURITYADMIN for inspection
35
+ INSPECTOR_ROLE = os.getenv(ENV_VAR_OVERRIDE_ROLE, "SECURITYADMIN")
36
+
37
+ SYSTEM_DEFINED_ROLES = [
38
+ "accountadmin",
39
+ "securityadmin",
40
+ "sysadmin",
41
+ "useradmin",
42
+ "orgadmin",
43
+ ]
44
+
45
+ STRING_CASING_CONVERSION_MAP = {
46
+ "rsa_public_key": str, # Keep case
47
+ }
tundri/core.py ADDED
@@ -0,0 +1,278 @@
1
+ import logging
2
+ import os
3
+ from typing import FrozenSet, Dict, List
4
+
5
+ from rich.console import Console
6
+ from rich.logging import RichHandler
7
+ from rich.prompt import Prompt
8
+ from yaml import load, Loader
9
+
10
+ from tundri.constants import (
11
+ OBJECT_TYPES,
12
+ OBJECT_ROLE_MAP,
13
+ SYSTEM_DEFINED_ROLES,
14
+ )
15
+ from tundri.inspector import inspect_object_type
16
+ from tundri.objects import SnowflakeObject
17
+ from tundri.parser import parse_object_type
18
+ from tundri.utils import (
19
+ get_configs,
20
+ get_snowflake_cursor,
21
+ format_params,
22
+ )
23
+
24
+
25
+ all_ddl_statements = {object_type: None for object_type in OBJECT_TYPES}
26
+
27
+ drop_template = "USE ROLE {role};DROP {object_type} {name};"
28
+ create_template = "USE ROLE {role};CREATE {object_type} {name} {extra_sql};"
29
+ alter_template = "USE ROLE {role};ALTER {object_type} {name} SET {parameters};"
30
+
31
+ objects_to_ignore_in_alter = {"user": ["snowflake"]}
32
+ params_to_ignore_in_alter = {
33
+ "user": ["password", "must_change_password"],
34
+ "warehouse": ["initially_suspended", "statement_timeout_in_seconds"],
35
+ }
36
+
37
+
38
+ logging.basicConfig(
39
+ level="WARN", format="%(message)s", datefmt="[%X]", handlers=[RichHandler()]
40
+ )
41
+ log = logging.getLogger(__name__)
42
+ log.setLevel("INFO")
43
+ console = Console()
44
+
45
+ # Compatible with GitHub, GitLab and Bitbucket
46
+ IS_CI_RUN = os.getenv("CI") == "true"
47
+
48
+
49
+ def build_statements_list(
50
+ statements: Dict, object_types: List[str] = OBJECT_TYPES
51
+ ) -> List:
52
+ """
53
+ Build a list of statements to be executed from a dictionary of statements with the
54
+ structure:
55
+ {
56
+ "user": {
57
+ "drop": ["DROP USER ...", "DROP USER ..."],
58
+ "create": ["CREATE USER ..., "CREATE USER ..."],
59
+ "alter": ["ALTER USER ..., "ALTER USER ..."],
60
+ }
61
+ }
62
+
63
+ To a list of statements like:
64
+ ["DROP USER ...", "DROP USER ...", "CREATE USER ..., "ALTER USER ..."]
65
+
66
+ Args:
67
+ statements: dict with the list of statements of each type (e.g. create, drop)
68
+ it assumes statements come as pairs like "USE ROLE...; CREATE/DROP ..."
69
+ object_types: list of object types to process, defaults to OBJECT_TYPES constant
70
+
71
+ Returns:
72
+ statements_seq: list with drop, create and alter statements in sequence for all
73
+ object types
74
+ """
75
+ statements_seq = []
76
+ for object_type in object_types:
77
+ for operation in ["drop", "create", "alter"]: # Order matters
78
+ for statement_pair in statements[object_type][operation]:
79
+ for s in statement_pair.split(";"):
80
+ if s: # Ignore empty strings
81
+ statements_seq.append(s.strip())
82
+ return statements_seq
83
+
84
+
85
+ def print_ddl_statements(statements: Dict) -> None:
86
+ """Print DDL statements to be executed."""
87
+ if not statements:
88
+ console.log(
89
+ "No statements to execute (the state of Snowflake objects matches the Permifrost spec)\n"
90
+ )
91
+ return
92
+ for s in statements:
93
+ if s.startswith("USE ROLE"):
94
+ continue
95
+ console.log(f"[italic]- {s}[/italic]")
96
+ console.log()
97
+
98
+
99
+ def execute_ddl(cursor, statements: List) -> None:
100
+ """Execute drop, create and alter statements in sequence for each object type.
101
+
102
+ Args:
103
+ cursor: Snowflake API cursor object
104
+ statements: list with drop, create and alter statements in sequence for all
105
+ object types
106
+ """
107
+ console.log("\n[bold]Executing DDL statements[/bold]:")
108
+ for s in statements:
109
+ cursor.execute(s)
110
+ if s.startswith("USE ROLE"):
111
+ continue
112
+ console.log(f"[green]\u2713[/green] [italic]{s}[/italic]")
113
+
114
+
115
+ def ignore_system_defined_roles(
116
+ objects: FrozenSet[SnowflakeObject],
117
+ ) -> FrozenSet[SnowflakeObject]:
118
+ """Ignore system-defined roles to avoid errors when trying to create or drop them."""
119
+ return frozenset(
120
+ [
121
+ obj
122
+ for obj in objects
123
+ if not (obj.type == "role" and obj.name in SYSTEM_DEFINED_ROLES)
124
+ ]
125
+ )
126
+
127
+
128
+ def resolve_objects(
129
+ existing_objects: FrozenSet[SnowflakeObject],
130
+ ought_objects: FrozenSet[SnowflakeObject],
131
+ ) -> Dict:
132
+ """Prepare DROP, CREATE and ALTER statements for an object type.
133
+
134
+ Args:
135
+ existing_objects: Set of Snowflake objects that currently exist
136
+ ought_objects: Set of Snowflake objects that are expected to exist
137
+
138
+ Returns:
139
+ ddl_statements: dict with drop, create and alter keys with lists of DDL statments
140
+ to be executed for the given object type
141
+ """
142
+ ddl_statements = {
143
+ "drop": [],
144
+ "create": [],
145
+ "alter": [],
146
+ }
147
+
148
+ # Infer type from arguments
149
+ object_type = list(existing_objects)[0].type
150
+ console.log(f"Resolving {object_type} objects")
151
+
152
+ role = OBJECT_ROLE_MAP[object_type]
153
+
154
+ # Check which objects to drop/create/keep
155
+ objects_to_drop = existing_objects.difference(ought_objects)
156
+ if object_type == "schema": # Schemas should not be dropped
157
+ objects_to_drop = frozenset()
158
+ objects_to_create = ought_objects.difference(existing_objects)
159
+ objects_to_keep = ought_objects.intersection(existing_objects)
160
+
161
+ # Remove create or drop statements for system-defined roles
162
+ objects_to_create = ignore_system_defined_roles(objects_to_create)
163
+ objects_to_drop = ignore_system_defined_roles(objects_to_drop)
164
+
165
+ # Prepare CREATE/DROP statements
166
+ ddl_statements["create"] = [
167
+ create_template.format(
168
+ role=role,
169
+ object_type=object_type,
170
+ name=obj.name,
171
+ extra_sql=format_params(obj.params),
172
+ ).strip()
173
+ for obj in objects_to_create
174
+ ]
175
+ ddl_statements["drop"] = [
176
+ drop_template.format(role=role, object_type=object_type, name=obj.name)
177
+ for obj in objects_to_drop
178
+ ]
179
+
180
+ # Prepare ALTER statements
181
+ existing_objects_to_keep = sorted(
182
+ [obj for obj in existing_objects if obj in objects_to_keep]
183
+ )
184
+ ought_objects_to_keep = sorted(
185
+ [obj for obj in ought_objects if obj in objects_to_keep]
186
+ )
187
+
188
+ for existing, ought in zip(existing_objects_to_keep, ought_objects_to_keep):
189
+ assert (
190
+ existing == ought
191
+ ) # Leverages custom __eq__ implementation to compare name and type
192
+ if not ought.params:
193
+ continue
194
+ if existing.params == ought.params:
195
+ continue
196
+
197
+ for p in params_to_ignore_in_alter.get(object_type, list()):
198
+ ought.params.pop(p, None)
199
+
200
+ ought_params_set = set(ought.params.items())
201
+ existing_params_set = set(existing.params.items())
202
+ params_to_alter_set = ought_params_set.difference(existing_params_set)
203
+ if not params_to_alter_set:
204
+ continue
205
+ if ought.name in objects_to_ignore_in_alter.get(object_type, list()):
206
+ continue
207
+ ddl_statements["alter"].append(
208
+ alter_template.format(
209
+ role=role,
210
+ object_type=object_type,
211
+ name=ought.name,
212
+ parameters=format_params(dict(params_to_alter_set)),
213
+ )
214
+ )
215
+
216
+ return ddl_statements
217
+
218
+
219
+ def drop_create_objects(permifrost_spec_path: str, is_dry_run: bool):
220
+ """
221
+ Drop and create Snowflake objects based on Permifrost specification and inspection of Snowflake metadata.
222
+
223
+ Args:
224
+ permifrost_spec_path: path to the Permifrost specification file
225
+ is_dry_run: flag to run the operation in dry-run mode
226
+
227
+ Returns:
228
+ bool: True if the operation was successful, False otherwise
229
+ """
230
+ permifrost_spec = load(open(permifrost_spec_path, "r"), Loader=Loader)
231
+
232
+ for object_type in OBJECT_TYPES:
233
+ existing_objects = inspect_object_type(object_type)
234
+ ought_objects = parse_object_type(permifrost_spec, object_type)
235
+ all_ddl_statements[object_type] = resolve_objects(
236
+ existing_objects,
237
+ ought_objects,
238
+ )
239
+
240
+ console.log("\n[bold]DDL statements to be executed[/bold]:")
241
+ ddl_statements_seq = build_statements_list(all_ddl_statements)
242
+ print_ddl_statements(ddl_statements_seq)
243
+ drop_statements = [s for s in ddl_statements_seq if s.startswith("DROP")]
244
+
245
+ if IS_CI_RUN:
246
+ console.log(
247
+ "[bold][yellow]CI run detected[/bold][/yellow]: Skipping manual confirmations"
248
+ )
249
+
250
+ if not is_dry_run and not IS_CI_RUN:
251
+ configs = get_configs()
252
+ console.log(
253
+ f"\n[bold][blue]INFO[/bold][/blue]: Executing for Snowflake account: {configs['account']}"
254
+ )
255
+ user_input = Prompt.ask(
256
+ f"\n\t>>> Type [bold]{configs['account']}[/bold] to proceed or any other key to abort"
257
+ )
258
+ if user_input.lower() != configs["account"].lower():
259
+ console.log()
260
+ console.log("Exited without executing any statements")
261
+ return False
262
+
263
+ if not is_dry_run and not IS_CI_RUN and drop_statements:
264
+ console.log(
265
+ f"\n[bold][red]WARNING[/bold][/red]: The following DROP statements are about to be executed: {(drop_statements)}"
266
+ )
267
+ user_input = Prompt.ask(
268
+ "\n\t>>> Type [bold]drop[/bold] to proceed or any other key to abort"
269
+ )
270
+ if user_input.lower() != "drop":
271
+ console.log()
272
+ console.log("Exited without executing any statements")
273
+ return False
274
+
275
+ if not is_dry_run:
276
+ execute_ddl(get_snowflake_cursor(), ddl_statements_seq)
277
+
278
+ return True
tundri/inspector.py ADDED
@@ -0,0 +1,97 @@
1
+ from pprint import pprint
2
+ from typing import FrozenSet
3
+
4
+ from tundri.constants import OBJECT_TYPES, OBJECT_TYPE_MAP, INSPECTOR_ROLE
5
+ from tundri.objects import SnowflakeObject, Schema
6
+ from tundri.utils import plural, get_snowflake_cursor, format_metadata_value
7
+
8
+ # Column names of SHOW statement are different than parameter names in DDL statements
9
+ parameter_name_map = {
10
+ "warehouse": {
11
+ "size": "warehouse_size",
12
+ "type": "warehouse_type",
13
+ },
14
+ }
15
+
16
+
17
+ def inspect_schemas() -> FrozenSet[Schema]:
18
+ """Get schemas that exist based on Snowflake metadata.
19
+
20
+ Returns:
21
+ inspected_objects: set of instances of `SnowflakeObject` subclasses
22
+ """
23
+ # Keys are databases and values are list of schemas e.g. {'ANALYTICS': ['REPORTING']}
24
+ existing_schemas = {}
25
+ with get_snowflake_cursor() as cursor:
26
+ cursor.execute(f"USE ROLE {INSPECTOR_ROLE}")
27
+ cursor.execute("SHOW SCHEMAS IN ACCOUNT")
28
+ schemas_list = [
29
+ (row[4], row[1]) for row in cursor
30
+ ] # List of tuples: database, schema
31
+ for database, schema in schemas_list:
32
+ database = database.upper()
33
+ schema = schema.upper()
34
+ if not existing_schemas.get(database):
35
+ existing_schemas[database] = []
36
+ existing_schemas[database].append(schema)
37
+
38
+ existing_schema_names = []
39
+ for database, schemas in existing_schemas.items():
40
+ for schema in schemas:
41
+ existing_schema_names.append(f"{database}.{schema}")
42
+
43
+ return frozenset([Schema(name=name) for name in existing_schema_names])
44
+
45
+
46
+ def inspect_object_type(object_type: str) -> FrozenSet[SnowflakeObject]:
47
+ """Initialize Snowflake objects of a given type from Snowflake metadata.
48
+
49
+ Args:
50
+ object_type: Object type e.g. "database", "user", etc
51
+
52
+ Returns:
53
+ inspected_objects: set of instances of `SnowflakeObject` subclasses
54
+ """
55
+ if object_type == "schema":
56
+ return inspect_schemas()
57
+
58
+ with get_snowflake_cursor() as cursor:
59
+ cursor.execute(f"USE ROLE {INSPECTOR_ROLE}")
60
+ cursor.execute(f"SHOW {plural(object_type)}")
61
+ desc = cursor.description
62
+ column_names = [
63
+ parameter_name_map.get(object_type, dict()).get(col[0], col[0]) for col in desc
64
+ ]
65
+ formatted_rows = [
66
+ tuple(
67
+ [
68
+ format_metadata_value(column_names[idx], value)
69
+ for idx, value in enumerate(row)
70
+ ]
71
+ )
72
+ for row in cursor
73
+ ]
74
+ data = [dict(zip(column_names, row)) for row in formatted_rows]
75
+
76
+ inspected_objects = []
77
+ for object in data:
78
+ name = object.pop("name")
79
+ # Ignore Snowflake system objects
80
+ if name.startswith("system$"):
81
+ continue
82
+ inspected_objects.append(OBJECT_TYPE_MAP[object_type](name=name, params=object))
83
+
84
+ return frozenset(inspected_objects)
85
+
86
+
87
+ def run():
88
+ inspected_objects = {plural(object_type): None for object_type in OBJECT_TYPES}
89
+
90
+ inspected_objects["warehouses"] = inspect_object_type("warehouse")
91
+ inspected_objects["databases"] = inspect_object_type("database")
92
+
93
+ pprint(inspected_objects)
94
+
95
+
96
+ if __name__ == "__main__":
97
+ run()
tundri/objects.py ADDED
@@ -0,0 +1,74 @@
1
+ from dataclasses import dataclass, field
2
+ from typing import Dict, Tuple
3
+
4
+
5
+ @dataclass(frozen=True)
6
+ class SnowflakeObject:
7
+ """Base class to represent Snowflake objects.
8
+
9
+ It has customized behavior for equality checks, set operations and sort. This is
10
+ done to allow for simpler comparisons between objects that exist vs. ought to exist.
11
+ Equality checks ignore the paramaters, which need to be checked using more complex logic.
12
+
13
+ Attributes:
14
+ type: object type, e.g. `database`, `warehouse`, etc
15
+ name: object name, e.g. `raw` for a database or `load` for a warehouse
16
+ params: dict with object parameters, e.g. for a user: {'default_warehouse': 'load'}
17
+ required_params: tuple with values expected as keys of `params`
18
+ """
19
+
20
+ type: str = None
21
+ name: str = None
22
+ params: Dict = field(default_factory=dict)
23
+ required_params: Tuple = field(default_factory=tuple)
24
+
25
+ def __eq__(self, other):
26
+ return (
27
+ self.type.casefold() == other.type.casefold()
28
+ and self.name.casefold() == other.name.casefold()
29
+ )
30
+
31
+ def __hash__(self):
32
+ return hash((self.type.casefold(), self.name.casefold()))
33
+
34
+ def __lt__(self, other):
35
+ return self.name.casefold() < other.name.casefold()
36
+
37
+ def get_missing_required_params(self):
38
+ if self.required_params and not self.params:
39
+ return self.required_params
40
+ return [key for key in self.required_params if key not in self.params.keys()]
41
+
42
+ def check_required_params(self):
43
+ return not self.get_missing_required_params()
44
+
45
+
46
+ @dataclass(frozen=True, eq=False)
47
+ class Warehouse(SnowflakeObject):
48
+ type: str = "warehouse"
49
+ required_params: Tuple = tuple(["warehouse_size", "auto_suspend"])
50
+
51
+
52
+ @dataclass(frozen=True, eq=False)
53
+ class Database(SnowflakeObject):
54
+ type: str = "database"
55
+
56
+
57
+ @dataclass(frozen=True, eq=False)
58
+ class Schema(SnowflakeObject):
59
+ type: str = "schema"
60
+
61
+
62
+ @dataclass(frozen=True, eq=False)
63
+ class Role(SnowflakeObject):
64
+ type: str = "role"
65
+
66
+
67
+ @dataclass(frozen=True, eq=False)
68
+ class User(SnowflakeObject):
69
+ type: str = "user"
70
+ required_params: Tuple = tuple(["default_role"])
71
+
72
+
73
+ class ConfigurationValueError(ValueError):
74
+ pass
tundri/parser.py ADDED
@@ -0,0 +1,112 @@
1
+ from yaml import load, Loader
2
+ from pprint import pprint
3
+ from typing import FrozenSet
4
+
5
+ from tundri.constants import OBJECT_TYPES, OBJECT_TYPE_MAP
6
+ from tundri.objects import SnowflakeObject, Schema, ConfigurationValueError
7
+ from tundri.utils import plural, format_metadata_value
8
+
9
+ PERMIFROST_YAML_FILEPATH = "examples/permifrost.yml"
10
+
11
+
12
+ def parse_schemas(permifrost_spec: dict) -> FrozenSet[Schema]:
13
+ """Get schemas that ought to exist based on specific role definitions.
14
+
15
+ The way schemas are defined in Permifrost is different from the other objects. We
16
+ need to infer which ones need to exist based on definitions of roles that should
17
+ use/own them.
18
+
19
+ Args:
20
+ permifrost_spec: Dict with contents from Permifrost YAML file
21
+
22
+ Returns:
23
+ parsed_objects: set of instances of `Schema` class
24
+ """
25
+ # Keys are databases and values are list of schemas e.g. {'ANALYTICS': ['REPORTING']}
26
+ ought_schemas = {}
27
+ for role in permifrost_spec["roles"]:
28
+ role_name, permi_defs = list(role.items())[0]
29
+ if permi_defs.get("owns") and permi_defs["owns"].get("schemas"):
30
+ for schema in permi_defs["owns"]["schemas"]:
31
+ database, schema_name = schema.upper().split(".")
32
+ if not schema_name == "*":
33
+ if not ought_schemas.get(database):
34
+ ought_schemas[database] = []
35
+ ought_schemas[database].append(schema_name)
36
+ if permi_defs.get("privileges") and permi_defs["privileges"].get("schemas"):
37
+ for schema in permi_defs["privileges"]["schemas"].get("read", []):
38
+ database, schema_name = schema.upper().split(".")
39
+ if not schema_name == "*":
40
+ if not ought_schemas.get(database):
41
+ ought_schemas[database] = []
42
+ if not schema_name in ought_schemas[database]:
43
+ ought_schemas[database].append(schema_name)
44
+ for schema in permi_defs["privileges"]["schemas"].get("write", []):
45
+ database, schema_name = schema.upper().split(".")
46
+ if not schema_name == "*":
47
+ if not ought_schemas.get(database):
48
+ ought_schemas[database] = []
49
+ if not schema_name in ought_schemas[database]:
50
+ ought_schemas[database].append(schema_name)
51
+
52
+ ought_schema_names = []
53
+ for database, schemas in ought_schemas.items():
54
+ for schema in schemas:
55
+ ought_schema_names.append(f"{database}.{schema}")
56
+
57
+ return frozenset([Schema(name=name) for name in ought_schema_names])
58
+
59
+
60
+ def parse_object_type(
61
+ permifrost_spec: dict, object_type: str
62
+ ) -> FrozenSet[SnowflakeObject]:
63
+ """Initialize Snowflake objects of a given type from Permifrost spec.
64
+
65
+ Args:
66
+ permifrost_spec: Dict with contents from Permifrost YAML file
67
+ object_type: Object type e.g. "database", "user", etc
68
+
69
+ Returns:
70
+ parsed_objects: set of instances of `SnowflakeObject` subclasses
71
+ """
72
+ if object_type == "schema":
73
+ return parse_schemas(permifrost_spec)
74
+
75
+ parsed_objects = []
76
+ for object in permifrost_spec.get(plural(object_type), []):
77
+ # Each object is a dict with a single key (its name) and a dict containing the spec as value
78
+ object_name = list(object.keys())[0]
79
+ object_spec = object[object_name]
80
+ params = dict()
81
+ if "meta" in object_spec.keys():
82
+ params = object_spec["meta"] # Use all contents of meta as DDL parameters
83
+ for name, value in params.items():
84
+ params[name] = format_metadata_value(name, value)
85
+ new_parsed_object = OBJECT_TYPE_MAP[object_type](
86
+ name=object_name, params=params
87
+ )
88
+ if not new_parsed_object.check_required_params():
89
+ raise ConfigurationValueError(
90
+ f"Required parameters for object '{object_name}' of type '{object_type}' missing: {new_parsed_object.get_missing_required_params()}"
91
+ )
92
+ parsed_objects.append(new_parsed_object)
93
+
94
+ return frozenset(parsed_objects)
95
+
96
+
97
+ def run():
98
+ permifrost_spec = load(open(PERMIFROST_YAML_FILEPATH, "r"), Loader=Loader)
99
+
100
+ parsed_objects = {plural(object_type): None for object_type in OBJECT_TYPES}
101
+
102
+ parsed_objects["warehouses"] = parse_object_type(permifrost_spec, "warehouse")
103
+ parsed_objects["databases"] = parse_object_type(permifrost_spec, "database")
104
+ parsed_objects["roles"] = parse_object_type(permifrost_spec, "role")
105
+ parsed_objects["users"] = parse_object_type(permifrost_spec, "user")
106
+ parsed_objects["schemas"] = parse_object_type(permifrost_spec, "schema")
107
+
108
+ pprint(parsed_objects)
109
+
110
+
111
+ if __name__ == "__main__":
112
+ run()
tundri/utils.py ADDED
@@ -0,0 +1,208 @@
1
+ import logging
2
+ import os
3
+ import subprocess
4
+ from typing import Dict, Type, T
5
+ from pathlib import Path
6
+
7
+ from dotenv import load_dotenv, dotenv_values
8
+
9
+ from rich.console import Console
10
+ from rich.logging import RichHandler
11
+ from snowflake.connector import connect
12
+
13
+ from tundri.constants import STRING_CASING_CONVERSION_MAP
14
+
15
+
16
+ logging.basicConfig(
17
+ level="WARN", format="%(message)s", datefmt="[%X]", handlers=[RichHandler()]
18
+ )
19
+ log = logging.getLogger(__name__)
20
+ log.setLevel("INFO")
21
+ console = Console()
22
+
23
+ # Suppress urllib3 connection warnings from Snowflake connector
24
+ logging.getLogger("urllib3.connectionpool").setLevel(logging.ERROR)
25
+ logging.getLogger("snowflake.connector.vendored.urllib3.connectionpool").setLevel(logging.ERROR)
26
+
27
+
28
+ class ConfigurationError(Exception):
29
+ pass
30
+
31
+
32
+ def load_env_var(path_to_env: str):
33
+ """
34
+ Loads environment variables from a dotenv file.
35
+ Dotenv file has to live in the same directory as the Permifrost specifications file.
36
+ If an evironment variable with the same name already exists in on the system (e.g.,
37
+ in .bashrc), the existing variable is overwritten with the corresponding value from
38
+ the dotenv file. Filename has to be ".env".
39
+
40
+ :param path_to_env: Path to .env file
41
+ :return: --
42
+ """
43
+ console.log("[bold][purple]Loading environment variables [/purple] started[/bold]")
44
+ path_to_dotenv = (
45
+ Path(path_to_env)
46
+ .resolve() # Converts relative to absolute path
47
+ .parent # Drop filename, and only retain dir from path
48
+ )
49
+ path_to_dotenv = path_to_dotenv / ".env"
50
+
51
+ console.log(f"Checking for [italic]{str(path_to_dotenv)}[/italic]")
52
+ if path_to_dotenv.is_file():
53
+ console.log("Found dotenv file in directory; parsing")
54
+ env_var = dotenv_values(path_to_dotenv) # Dump the contents of .env in a variable
55
+ if not env_var:
56
+ console.log("Dotenv file is empty, nothing to parse")
57
+ console.log("Using system's environment variables instead")
58
+ else:
59
+ console.log("Loading the following environment variables from dotenv:")
60
+ for key, value in env_var.items():
61
+ console.log(f"{key}={value}")
62
+ console.log(
63
+ "\nThe following environment variables already exist on the system " +
64
+ "and will be overwritten with the contents of the dotenv file:"
65
+ )
66
+ for key, value in env_var.items():
67
+ this_value = os.environ.get(key)
68
+ if this_value != None:
69
+ console.log(f"{key}={this_value}")
70
+ load_dotenv(path_to_dotenv, override=True)
71
+ else:
72
+ console.log(f"Could not find dotenv file under {str(path_to_dotenv)}")
73
+ console.log("Using system's environment variables instead")
74
+
75
+
76
+ def get_configs() -> Dict[str, str]:
77
+ """Get the configuration from environment variables and validate them before returning"""
78
+ config = {
79
+ "user": os.getenv("PERMISSION_BOT_USER"),
80
+ "password": os.getenv("PERMISSION_BOT_PASSWORD", ""),
81
+ "account": os.getenv("PERMISSION_BOT_ACCOUNT"),
82
+ "database": os.getenv("PERMISSION_BOT_DATABASE"),
83
+ "role": os.getenv("PERMISSION_BOT_ROLE"),
84
+ "warehouse": os.getenv("PERMISSION_BOT_WAREHOUSE"),
85
+ "key_path": os.getenv("PERMISSION_BOT_KEY_PATH"),
86
+ "key_passphrase": os.getenv("PERMISSION_BOT_KEY_PASSPHRASE"),
87
+ }
88
+
89
+ if not config["account"]:
90
+ raise ConfigurationError(
91
+ "The PERMISSION_BOT_ACCOUNT environment variable is not set"
92
+ )
93
+ if not config["database"]:
94
+ raise ConfigurationError(
95
+ "The PERMISSION_BOT_DATABASE environment variable is not set"
96
+ )
97
+ if not config["user"]:
98
+ raise ConfigurationError(
99
+ "The PERMISSION_BOT_USER environment variable is not set"
100
+ )
101
+ if not config["warehouse"]:
102
+ raise ConfigurationError(
103
+ "The PERMISSION_BOT_WAREHOUSE environment variable is not set"
104
+ )
105
+
106
+ return config
107
+
108
+
109
+ def get_snowflake_cursor():
110
+ """Get a Snowflake cursor with support for private key authentication"""
111
+ config = get_configs()
112
+ connection_params = {
113
+ "user": config["user"],
114
+ "account": config["account"],
115
+ "warehouse": config["warehouse"],
116
+ "database": config["database"],
117
+ }
118
+
119
+ if config["key_path"] is not None:
120
+ connection_params = {
121
+ **connection_params,
122
+ "private_key_file": config["key_path"],
123
+ "private_key_file_pwd": config["key_passphrase"],
124
+ }
125
+ else:
126
+ connection_params = {
127
+ **connection_params,
128
+ "password": config["password"],
129
+ }
130
+
131
+ return connect(**connection_params).cursor()
132
+
133
+
134
+ def plural(name: str) -> str:
135
+ return f"{name}s"
136
+
137
+
138
+ def format_metadata_value(name: str, value):
139
+ """
140
+ Format metadata values read from the YAML file or Snowflake metadata
141
+
142
+ Most values are converted to lowercase to simplify comparisons, but other parameters
143
+ like `rsa_public_key` are treated differently as defined in `STRING_CASING_CONVERSION_MAP`
144
+ """
145
+ if isinstance(value, str):
146
+ str_callable = STRING_CASING_CONVERSION_MAP.get(name, str.lower)
147
+ value = str_callable(value.strip())
148
+ if value.casefold() == "true":
149
+ return True
150
+ if value.casefold() == "false":
151
+ return False
152
+ return value
153
+ return value
154
+
155
+
156
+ def format_params(params: Dict) -> str:
157
+ """Returns formated list of parameters to use as arguments in DDL statements"""
158
+
159
+ def get_param_value_type(value: str) -> Type[T]:
160
+ if not isinstance(value, str):
161
+ value = str(value)
162
+ if value.isdigit():
163
+ return int
164
+ if value.upper() in ["TRUE", "FALSE"]:
165
+ return bool
166
+ return str
167
+
168
+ params_formatted = []
169
+ templates = {
170
+ int: "{name} = {value}",
171
+ bool: "{name} = {value}",
172
+ str: "{name} = '{value}'",
173
+ }
174
+ for name, value in params.items():
175
+ value_type = get_param_value_type(value)
176
+ # Workaround to ensure Snowflake accepts default_role, default_warehouse and
177
+ # default_namespace when quoted as alter statement parameters
178
+ if name.lower() in ["default_role", "default_warehouse", "default_namespace"]:
179
+ value = value.upper()
180
+ params_formatted.append(templates[value_type].format(name=name, value=value))
181
+ return ", ".join(params_formatted)
182
+
183
+
184
+ def run_command(command):
185
+ process = subprocess.Popen(
186
+ command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
187
+ )
188
+
189
+ # Continuously read and print output
190
+ while True:
191
+ output = process.stdout.readline()
192
+ if output == "" and process.poll() is not None:
193
+ break
194
+ if output:
195
+ console.log(output.strip())
196
+
197
+ # Check for errors
198
+ output, errs = process.communicate()
199
+ if process.returncode != 0:
200
+ console.log(f"Error: {errs.strip()}")
201
+ raise subprocess.CalledProcessError(process.returncode, command, errs)
202
+ return output, errs
203
+
204
+
205
+ def log_dry_run_info():
206
+ console.log(80 * "-")
207
+ console.log("[bold]Executing in [yellow]dry run mode[/yellow][/bold]")
208
+ console.log(80 * "-")
@@ -0,0 +1,119 @@
1
+ Metadata-Version: 2.4
2
+ Name: tundri
3
+ Version: 1.3.1
4
+ Summary: Drop, create and alter Snowflake objects and set permissions with Permifrost
5
+ Project-URL: Homepage, https://github.com/Gemma-Analytics/tundri
6
+ Project-URL: Repository, https://github.com/Gemma-Analytics/tundri
7
+ Project-URL: Issues, https://github.com/Gemma-Analytics/tundri/issues
8
+ Author-email: Gemma Analytics <bijan.soltani@gemmaanalytics.com>
9
+ License: MIT
10
+ Keywords: database,ddl,permifrost,snowflake
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.8
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Requires-Python: >=3.8
21
+ Requires-Dist: gemma-permifrost
22
+ Requires-Dist: python-dotenv
23
+ Requires-Dist: pyyaml
24
+ Requires-Dist: rich
25
+ Requires-Dist: snowflake-connector-python
26
+ Description-Content-Type: text/markdown
27
+
28
+ <div align="center">
29
+ <img src="docs/images/logo.jpg" alt="Snowflake Manager Logo" width="200">
30
+ </div>
31
+
32
+ **tundri** is a Python package to declaratively create, drop, and alter Snowflake objects and manage their permissions with [Permifrost](https://gitlab.com/gitlab-data/permifrost).
33
+
34
+ ## Motivation
35
+
36
+ Permifrost is great at managing permissions, but it doesn't create or alter objects. As [GitLab's data team handbook](https://handbook.gitlab.com/handbook/enterprise-data/platform/permifrost/) states:
37
+ > Object creation and deletion is not managed by permifrost
38
+
39
+ With only Permifrost, one would have to manually create the objects and then run Permifrost to set the permissions. This is error prone and time consuming. That is where tundri comes in.
40
+
41
+ ### In a nutshell
42
+ **tundri** reads the [Permifrost spec file](https://gitlab.com/gitlab-data/permifrost#spec_file) and compares with the current state of the Snowflake account. It then creates, drops, and alters the objects to match. It leverages Permifrost's YAML `meta` tags to set attributes like `default_role` for users and `warehouse_size` for warehouses. Once the objects are created, tundri runs Permifrost to set the permissions.
43
+
44
+ ## Getting started
45
+
46
+ ### Prerequisites
47
+
48
+ - Credentials to a Snowflake account with the `securityadmin` role
49
+ - A Permifrost spec file
50
+
51
+ ### Install
52
+
53
+ ```bash
54
+ pip install tundri
55
+ ```
56
+
57
+ ### Configure
58
+
59
+ #### Permifrost
60
+ Add a valid [Permifrost spec file](https://gitlab.com/gitlab-data/permifrost#spec_file) to your repository. You can use the files in the `examples` folder as reference.
61
+
62
+ #### Snowflake
63
+ Set up your Snowflake connection details in the environment variables listed below.
64
+
65
+ > [!TIP]
66
+ > You can use a `.env` file to store your credentials. Place it in the same folder as the Permifrost spec file.
67
+
68
+ ```bash
69
+ PERMISSION_BOT_ACCOUNT=abc134.west-europe.azure # Your account identifier
70
+ PERMISSION_BOT_USER=PERMIFROST
71
+ PERMISSION_BOT_PASSWORD=...
72
+ PERMISSION_BOT_ROLE=SECURITYADMIN # Permifrost requires it to be `SECURITYADMIN`
73
+ PERMISSION_BOT_DATABASE=PERMIFROST
74
+ PERMISSION_BOT_WAREHOUSE=ADMIN
75
+ ```
76
+
77
+ ### Usage
78
+ The `run` subcommand is going to drop/create objects and run Permifrost.
79
+
80
+ #### Dry run
81
+ ```bash
82
+ tundri run --permifrost_spec_path examples/permifrost.yml --dry
83
+ ```
84
+
85
+ #### Normal run
86
+ ```bash
87
+ tundri run --permifrost_spec_path examples/permifrost.yml
88
+ ```
89
+
90
+ #### Getting help
91
+ ```bash
92
+ tundri --help
93
+ ```
94
+
95
+ ## Development
96
+ ### Local setup
97
+ Install the development dependencies
98
+
99
+ ```bash
100
+ uv sync
101
+ ```
102
+
103
+ ### Run tests
104
+ Run the tests
105
+ ```bash
106
+ uv run pytest -v
107
+ ```
108
+
109
+ ### Formatting
110
+ Run the command below to format the code
111
+ ```bash
112
+ uv run black .
113
+ ```
114
+
115
+ ### Testing locally
116
+ Dry run with the example spec file
117
+ ```bash
118
+ uv run tundri run --dry -p examples/permifrost.yml
119
+ ```
@@ -0,0 +1,12 @@
1
+ tundri/__init__.py,sha256=LG-zblOfMP6hVPKsBg0_Vu7Np90aDwZoQtZkamDOsus,46
2
+ tundri/cli.py,sha256=sd29AnbTcUU5vDnYWid5q8yTTiiJfjo31YRXGRfrUgw,3003
3
+ tundri/constants.py,sha256=H4CKhnJpfxQFIalTow07LElNjREmOaV2C2UuxXgrkAo,1200
4
+ tundri/core.py,sha256=4bNENkEnqehTykF2TqmJNyLEg2EjXauUJq1A6CQLHzg,9448
5
+ tundri/inspector.py,sha256=zhDnXxFeKLCZR06zKHkBEAxyB4sO8Do0ugdu-PN-VaM,3210
6
+ tundri/objects.py,sha256=6nXQk-JgeDlC-ZTSwNtJbxuQ-MvFWyKnirACF1ZC-_M,2230
7
+ tundri/parser.py,sha256=KBxKo-kOMswdYY5QoImcl14vNL6uOg-pe9Q3vUfDX8Y,4767
8
+ tundri/utils.py,sha256=M7RpcdMD3rhHWnO_GTNBd0jFwJmhos0f2FRXHTINg7I,7235
9
+ tundri-1.3.1.dist-info/METADATA,sha256=zDTIBCpyQlXRotAQZXmcp6lHDGl3ToYFOOf0dUimJXE,3853
10
+ tundri-1.3.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
11
+ tundri-1.3.1.dist-info/entry_points.txt,sha256=OyOLF3YkcU4ah14hFwSxSJbhbwMnBLDVE8ymaPbfoYI,43
12
+ tundri-1.3.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.27.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ tundri = tundri.cli:main