snowglobe-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. snowglobe/__init__.py +6 -0
  2. snowglobe/__main__.py +3 -0
  3. snowglobe/cli/__init__.py +0 -0
  4. snowglobe/cli/access.py +197 -0
  5. snowglobe/cli/app.py +148 -0
  6. snowglobe/cli/context.py +48 -0
  7. snowglobe/cli/cost.py +291 -0
  8. snowglobe/cli/debug.py +265 -0
  9. snowglobe/cli/diff.py +34 -0
  10. snowglobe/cli/optimizer.py +91 -0
  11. snowglobe/cli/prompts.py +161 -0
  12. snowglobe/cli/report.py +91 -0
  13. snowglobe/cli/shell.py +1437 -0
  14. snowglobe/cli/shell_completer.py +128 -0
  15. snowglobe/collectors/access.py +882 -0
  16. snowglobe/collectors/query_history.py +46 -0
  17. snowglobe/collectors/query_profile.py +101 -0
  18. snowglobe/config/loader.py +42 -0
  19. snowglobe/core/access_service.py +721 -0
  20. snowglobe/core/cost_service.py +929 -0
  21. snowglobe/core/optimizer.py +92 -0
  22. snowglobe/core/query_service.py +48 -0
  23. snowglobe/core/report_service.py +110 -0
  24. snowglobe/core/risk_service.py +358 -0
  25. snowglobe/engines/access/__init__.py +0 -0
  26. snowglobe/engines/access/explainer.py +113 -0
  27. snowglobe/engines/access/resolver.py +199 -0
  28. snowglobe/engines/ai/cortex_optimizer.py +69 -0
  29. snowglobe/engines/optimizer/query_optimizer.py +326 -0
  30. snowglobe/graphs/__init__.py +0 -0
  31. snowglobe/graphs/role_graph.py +140 -0
  32. snowglobe/graphs/user_graph.py +64 -0
  33. snowglobe/models/__init__.py +0 -0
  34. snowglobe/models/access.py +65 -0
  35. snowglobe/models/access_path.py +15 -0
  36. snowglobe/models/object_ref.py +11 -0
  37. snowglobe/models/object_type.py +50 -0
  38. snowglobe/models/optimizer.py +15 -0
  39. snowglobe/models/privilege.py +78 -0
  40. snowglobe/models/query.py +59 -0
  41. snowglobe/output/__init__.py +0 -0
  42. snowglobe/output/cli.py +413 -0
  43. snowglobe/queries/__init__.py +0 -0
  44. snowglobe/queries/query_history.py +37 -0
  45. snowglobe/snowflake/connection.py +75 -0
  46. snowglobe/state/db.py +559 -0
  47. snowglobe/state/state.py +60 -0
  48. snowglobe/templates/report.md.j2 +55 -0
  49. snowglobe/tests/access_tests.py +5 -0
  50. snowglobe/tui/__init__.py +1 -0
  51. snowglobe/tui/__main__.py +3 -0
  52. snowglobe/tui/app.py +299 -0
  53. snowglobe/tui/screens/__init__.py +0 -0
  54. snowglobe/tui/screens/access.py +627 -0
  55. snowglobe/tui/screens/cost.py +831 -0
  56. snowglobe/tui/screens/home.py +222 -0
  57. snowglobe/tui/screens/refresh.py +222 -0
  58. snowglobe/tui/screens/reports.py +252 -0
  59. snowglobe/tui/screens/risk.py +417 -0
  60. snowglobe/tui/screens/tune.py +254 -0
  61. snowglobe/tui/widgets/__init__.py +0 -0
  62. snowglobe/tui/widgets/access_paths.py +63 -0
  63. snowglobe/tui/widgets/cache_badge.py +28 -0
  64. snowglobe/tui/widgets/header.py +21 -0
  65. snowglobe/tui/widgets/nav.py +32 -0
  66. snowglobe_cli-0.1.0.dist-info/METADATA +368 -0
  67. snowglobe_cli-0.1.0.dist-info/RECORD +71 -0
  68. snowglobe_cli-0.1.0.dist-info/WHEEL +5 -0
  69. snowglobe_cli-0.1.0.dist-info/entry_points.txt +2 -0
  70. snowglobe_cli-0.1.0.dist-info/licenses/LICENSE +202 -0
  71. snowglobe_cli-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,721 @@
1
+ import typer
2
+ from collections import defaultdict
3
+ from typing import Callable, Optional, Protocol
4
+ from snowglobe.state.db import StateDB
5
+ from snowglobe.collectors.access import AccessCollector
6
+ from snowglobe.graphs.role_graph import RoleGraph
7
+ from snowglobe.graphs.user_graph import UserGraph
8
+ from snowglobe.models.access import AccessGrant
9
+ from snowglobe.models.privilege import Privilege
10
+ from snowglobe.models.object_ref import ObjectRef
11
+ from snowglobe.models.object_type import ObjectType
12
+ from snowglobe.engines.access.explainer import AccessExplainer
13
+ from snowglobe.engines.access.resolver import AccessResolver
14
+
15
+ # Object types not tracked in GRANTS_TO_ROLES — require SHOW GRANTS ON fallback
16
+ _SHOW_GRANT_TYPES = {"STREAMLIT", "NOTEBOOK", "DYNAMIC TABLE", "ALERT", "TAG", "SECRET"}
17
+
18
+
19
+ class RefreshProgress(Protocol):
20
+ """
21
+ Sink for refresh / state-load progress output.
22
+
23
+ Levels:
24
+ start — top-level operation banner (e.g. 'Full refresh...')
25
+ info — routine progress / counts (e.g. ' Users: 146')
26
+ warning — soft warning (stale cache, missing state)
27
+ """
28
+ def start(self, msg: str) -> None: ...
29
+ def info(self, msg: str) -> None: ...
30
+ def warning(self, msg: str) -> None: ...
31
+
32
+
33
+ class TyperRefreshProgress:
34
+ """Default reporter — renders via typer (matches the current CLI output)."""
35
+
36
+ def start(self, msg: str) -> None:
37
+ typer.secho(msg, fg=typer.colors.CYAN)
38
+
39
+ def info(self, msg: str) -> None:
40
+ typer.echo(msg)
41
+
42
+ def warning(self, msg: str) -> None:
43
+ typer.secho(msg, fg=typer.colors.YELLOW)
44
+
45
+
46
+ class CallableRefreshProgress:
47
+ """
48
+ TUI-friendly reporter — every line goes through a single `(level, msg)` callback.
49
+
50
+ progress = CallableRefreshProgress(lambda level, msg: log.write(level, msg))
51
+ access_service.refresh_state(progress=progress)
52
+ """
53
+
54
+ def __init__(self, write: Callable[[str, str], None]):
55
+ self._write = write
56
+
57
+ def start(self, msg: str) -> None:
58
+ self._write("start", msg)
59
+
60
+ def info(self, msg: str) -> None:
61
+ self._write("info", msg)
62
+
63
+ def warning(self, msg: str) -> None:
64
+ self._write("warning", msg)
65
+
66
+
67
+ def _parse_object_type(obj_type_str: str) -> ObjectType:
68
+ try:
69
+ return ObjectType(obj_type_str)
70
+ except ValueError:
71
+ return ObjectType.UNKNOWN
72
+
73
+
74
+ def _grant_dicts_to_objects(grant_dicts: list[dict]) -> list[AccessGrant]:
75
+ """Convert raw SQLite grant dicts to AccessGrant objects."""
76
+ grants = []
77
+ for row in grant_dicts:
78
+ grants.append(AccessGrant(
79
+ role=row["grantee"],
80
+ privilege=row["privilege"],
81
+ object=ObjectRef(
82
+ object_type=_parse_object_type(row["granted_on"]),
83
+ name=row["fqn"],
84
+ ),
85
+ granted_on=row["granted_on"],
86
+ granted_by=row.get("granted_by", ""),
87
+ inherited=False,
88
+ source_role=None,
89
+ role_type="DATABASE" if row["grantee"].startswith("DATABASE_ROLE::") else "ACCOUNT",
90
+ ))
91
+ return grants
92
+
93
+
94
+ class AccessService:
95
+ def __init__(self, context):
96
+ self.context = context
97
+ self.load_profile()
98
+
99
+ def load_profile(self):
100
+ self.context.load_profile()
101
+ self.profile = self.context.profile
102
+
103
+ def get_profile(self):
104
+ return self.profile
105
+
106
+ def get_graphs(self):
107
+ self.setup_state()
108
+ self.load_state()
109
+ return self.user_graph, self.role_graph, self.object_index
110
+
111
+ def setup_state(self):
112
+ self.db = StateDB()
113
+
114
+ def refresh_state(self, full: bool = False, progress: Optional[RefreshProgress] = None):
115
+ """
116
+ Refresh state from Snowflake.
117
+ If full=True or no previous state exists, does a complete refresh.
118
+ Otherwise, does an incremental refresh using the last refresh timestamp.
119
+
120
+ Progress is reported through `progress` (defaults to typer-coloured stdout).
121
+ """
122
+ p = progress or TyperRefreshProgress()
123
+ sf = self.context.connect()
124
+
125
+ try:
126
+ with sf:
127
+ sf.query("SELECT 1 FROM SNOWFLAKE.ACCOUNT_USAGE.GRANTS_TO_ROLES LIMIT 1")
128
+ except Exception as e:
129
+ raise RuntimeError(
130
+ "Cannot access SNOWFLAKE.ACCOUNT_USAGE. "
131
+ "Grant access with:\n"
132
+ " GRANT IMPORTED PRIVILEGES ON DATABASE SNOWFLAKE TO ROLE <your_role>;"
133
+ ) from e
134
+
135
+ collector = AccessCollector(sf)
136
+
137
+ # Determine if we can do incremental
138
+ last_refresh = self.db.get_refreshed_at()
139
+ if full or not last_refresh or not self.db.has_state():
140
+ p.start("Full refresh...")
141
+ self._full_refresh(collector, p)
142
+ else:
143
+ p.start(f"Incremental refresh (since {last_refresh[:19]})...")
144
+ self._incremental_refresh(collector, last_refresh, p)
145
+
146
+ def _full_refresh(self, collector, progress: RefreshProgress):
147
+ """Complete refresh — fetch everything into SQLite."""
148
+ # 1. User roles
149
+ user_graph = collector.collect_user_roles()
150
+ self.db.save_user_roles(user_graph.to_dict())
151
+ self.user_graph = user_graph
152
+ progress.info(f" Users: {len(user_graph.assigned_roles)}")
153
+
154
+ # 2. Role hierarchy
155
+ role_graph = collector.collect_role_graph()
156
+ edges = []
157
+ for parent, children in role_graph.parents.items():
158
+ for child in children:
159
+ edges.append((parent, child))
160
+ self.db.save_role_edges(edges)
161
+ self.role_graph = role_graph
162
+ progress.info(f" Roles: {len(role_graph.parents)}")
163
+
164
+ # 3. ALL grants (783K rows)
165
+ progress.info(" Grants: fetching...")
166
+ grant_rows = collector.collect_all_grants_bulk()
167
+ self.db.save_grants(grant_rows)
168
+ progress.info(f" Grants: {len(grant_rows)}")
169
+
170
+ # 4. Extra objects (STREAMLIT, NOTEBOOK, DYNAMIC TABLE, ALERT — not in GRANTS_TO_ROLES)
171
+ progress.info(" Extra objects: fetching...")
172
+ extra_objects = collector.collect_extra_objects()
173
+ self.db.save_extra_objects(extra_objects)
174
+ extra_count = sum(len(v) for v in extra_objects.values())
175
+ progress.info(f" Extra objects: {extra_count} FQNs")
176
+
177
+ # 5. Update timestamp
178
+ self.db.set_refreshed_at()
179
+
180
+ # 6. Load object index from SQLite
181
+ self.object_index = self.db.query_object_index()
182
+ total_objects = sum(len(v) for v in self.object_index.values())
183
+ progress.info(f" Object index: {total_objects} FQNs (derived from grants)")
184
+
185
+ def _incremental_refresh(self, collector, since: str, progress: RefreshProgress):
186
+ """
187
+ Incremental refresh — only fetch changes since last refresh.
188
+ """
189
+ # 1. User roles — incremental
190
+ user_changes = collector.collect_user_roles_incremental(since)
191
+ if user_changes["added"] or user_changes["removed"]:
192
+ self.db.upsert_user_roles_incremental(
193
+ added=user_changes["added"],
194
+ removed=user_changes["removed"],
195
+ )
196
+ progress.info(f" Users: +{len(user_changes['added'])} / -{len(user_changes['removed'])} changes")
197
+ else:
198
+ progress.info(" Users: no changes")
199
+
200
+ # 2. Role graph — incremental
201
+ role_changes = collector.collect_role_graph_incremental(since)
202
+ if role_changes["added"] or role_changes["removed"]:
203
+ added_edges = []
204
+ for parent_key, children in role_changes["added"].items():
205
+ for child in children:
206
+ added_edges.append((parent_key, child))
207
+ removed_edges = []
208
+ for parent_key, children in role_changes["removed"].items():
209
+ for child in children:
210
+ removed_edges.append((parent_key, child))
211
+ self.db.upsert_role_edges_incremental(
212
+ added=added_edges, removed=removed_edges
213
+ )
214
+ progress.info(f" Roles: +{len(role_changes['added'])} / -{len(role_changes['removed'])} changes")
215
+ else:
216
+ progress.info(" Roles: no changes")
217
+
218
+ # 3. All grants — incremental
219
+ grant_changes = collector.collect_all_grants_incremental(since)
220
+ if grant_changes["upsert"] or grant_changes["delete"]:
221
+ self.db.upsert_grants_incremental(
222
+ upserts=grant_changes["upsert"],
223
+ deletes=grant_changes["delete"],
224
+ )
225
+ progress.info(f" Grants: +{len(grant_changes['upsert'])} / -{len(grant_changes['delete'])} changes")
226
+ else:
227
+ progress.info(" Grants: no changes")
228
+
229
+ # 4. Update timestamp
230
+ self.db.set_refreshed_at()
231
+
232
+ # 5. Reload graphs into memory after the incremental upserts
233
+ self.load_state(progress=progress, force=True)
234
+
235
+ def load_state(self, progress: Optional[RefreshProgress] = None, force: bool = False):
236
+ """Load role graph and user graph into memory from SQLite.
237
+
238
+ Idempotent on the instance — subsequent calls are no-ops unless `force=True`
239
+ or graphs aren't loaded. This lets a long-lived caller (the TUI) hold one
240
+ AccessService and have `inspect_access` re-call `load_state` internally
241
+ without re-paying the load cost.
242
+ """
243
+ if not force and getattr(self, "role_graph", None) is not None:
244
+ return
245
+ p = progress or TyperRefreshProgress()
246
+
247
+ if not self.db.has_state():
248
+ p.warning("No cached state found. Fetching from Snowflake...")
249
+ self.refresh_state(progress=p)
250
+ return
251
+
252
+ # Check staleness
253
+ self._check_staleness(p)
254
+
255
+ # Load role graph
256
+ rg_data = self.db.load_role_graph_data()
257
+ rg = RoleGraph()
258
+ rg = rg.from_dict(rg_data)
259
+ self.role_graph = rg
260
+
261
+ # Load user graph
262
+ ug_data = self.db.load_user_roles_data()
263
+ ug = UserGraph(ug_data, **self.context.profile)
264
+ self.user_graph = ug
265
+
266
+ # Object index (derived from grants table)
267
+ self.object_index = self.db.query_object_index()
268
+
269
+ def _check_staleness(self, progress: RefreshProgress):
270
+ """Warn if cached state is older than 24 hours."""
271
+ from datetime import datetime, timezone
272
+
273
+ refreshed_at = self.db.get_refreshed_at()
274
+ if not refreshed_at:
275
+ return
276
+
277
+ try:
278
+ refreshed = datetime.fromisoformat(refreshed_at)
279
+ age = datetime.now(timezone.utc) - refreshed
280
+ hours = age.total_seconds() / 3600
281
+
282
+ if hours > 24:
283
+ days = int(hours // 24)
284
+ label = f"{days} day(s)" if days >= 1 else f"{int(hours)} hour(s)"
285
+ progress.warning(f"State is {label} old. Run 'refresh' to update.")
286
+ except (ValueError, TypeError):
287
+ pass
288
+
289
+ def build_resolver(self, grants: list[AccessGrant]):
290
+ self.resolver = AccessResolver(
291
+ user_graph=self.user_graph,
292
+ role_graph=self.role_graph,
293
+ grants=grants,
294
+ )
295
+
296
+ def inspect_access(
297
+ self,
298
+ username: Optional[str],
299
+ role: Optional[str],
300
+ object_type: Optional[str],
301
+ object_name: Optional[str],
302
+ privilege: Optional[str],
303
+ ignore_excluded_roles: bool,
304
+ refresh_state: bool,
305
+ ):
306
+ """
307
+ Run access inspection with fully resolved arguments.
308
+ """
309
+ if ignore_excluded_roles:
310
+ self.profile['exclude_roles'] = []
311
+
312
+ self.setup_state()
313
+
314
+ if refresh_state:
315
+ self.refresh_state()
316
+
317
+ self.load_state()
318
+
319
+ # Fetch grants for the object from SQLite (instant)
320
+ object_name_upper = object_name.upper() if object_name else None
321
+ grant_dicts = self.db.query_grants_for_object(object_type, object_name_upper)
322
+ grants = _grant_dicts_to_objects(grant_dicts)
323
+
324
+ # Fallback: types not in GRANTS_TO_ROLES (STREAMLIT, NOTEBOOK, etc.)
325
+ if not grants and object_type and object_type.upper() in _SHOW_GRANT_TYPES:
326
+ sf = self.context.connect()
327
+ collector = AccessCollector(sf)
328
+ grants = collector.collect_grants_for_object(object_type, object_name_upper)
329
+
330
+ self.build_resolver(grants)
331
+
332
+ # Determine inspect type
333
+ if username and not role:
334
+ inspect_type = "user"
335
+ elif role and not username:
336
+ inspect_type = "role"
337
+ else:
338
+ raise ValueError("Must provide either username or role (not both, not neither).")
339
+
340
+ database = object_name_upper.split(".", 1)[0] if object_name_upper else None
341
+
342
+ args = {
343
+ "inspect_type": inspect_type,
344
+ "username": username,
345
+ "role": role,
346
+ "object_type": object_type,
347
+ "object_name": object_name_upper,
348
+ "database": database,
349
+ "privilege": privilege,
350
+ }
351
+
352
+ query = AccessExplainer(resolver=self.resolver, **args)
353
+ if inspect_type == "user":
354
+ return query.user_access(username=username)
355
+ return query.role_access(role=role)
356
+
357
+ def inspect_reverse(
358
+ self,
359
+ object_type: str,
360
+ object_name: str,
361
+ privilege: Optional[str] = None,
362
+ ) -> dict:
363
+ """
364
+ Reverse lookup: who/what can access this object?
365
+ """
366
+ self.setup_state()
367
+ self.load_state()
368
+
369
+ # Query grants from SQLite (instant)
370
+ grant_dicts = self.db.query_grants_for_object(object_type, object_name.upper())
371
+
372
+ # Fallback: types not in GRANTS_TO_ROLES (STREAMLIT, NOTEBOOK, etc.)
373
+ if not grant_dicts and object_type and object_type.upper() in _SHOW_GRANT_TYPES:
374
+ sf = self.context.connect()
375
+ collector = AccessCollector(sf)
376
+ show_grants = collector.collect_grants_for_object(object_type, object_name)
377
+ # Convert AccessGrant objects to dicts for consistent handling
378
+ grant_dicts = [
379
+ {"grantee": g.role, "privilege": g.privilege, "granted_on": g.granted_on,
380
+ "name": None, "table_catalog": None, "table_schema": None,
381
+ "granted_by": g.granted_by, "granted_to": g.role_type, "fqn": g.object.name}
382
+ for g in show_grants
383
+ ]
384
+
385
+ if not grant_dicts:
386
+ return {
387
+ "object_type": object_type.upper(),
388
+ "object_name": object_name.upper(),
389
+ "object_exists": False,
390
+ "privileges": {},
391
+ }
392
+
393
+ # Group grants by privilege
394
+ grants_by_privilege = defaultdict(list)
395
+ for g in grant_dicts:
396
+ if privilege and not Privilege.matches(g["privilege"], privilege):
397
+ continue
398
+ grants_by_privilege[g["privilege"]].append(g)
399
+
400
+ # For each privilege, find all roles (direct + inherited) and users
401
+ privileges_result = {}
402
+ for priv, priv_grants in sorted(grants_by_privilege.items()):
403
+ direct_roles = set()
404
+ for g in priv_grants:
405
+ direct_roles.add(g["grantee"])
406
+
407
+ # Find all roles that inherit from the direct roles (descendants)
408
+ all_roles_with_access = set(direct_roles)
409
+ for role_key in direct_roles:
410
+ descendants = self.role_graph.all_descendants(role_key)
411
+ all_roles_with_access.update(descendants)
412
+
413
+ # Find users who have any of these roles
414
+ users_with_access = []
415
+ for user, assigned_roles in self.user_graph.assigned_roles.items():
416
+ user_effective = set(assigned_roles)
417
+ for r in assigned_roles:
418
+ user_effective |= self.role_graph.all_ancestors(r)
419
+ if user_effective & all_roles_with_access:
420
+ via_roles = user_effective & all_roles_with_access
421
+ users_with_access.append({
422
+ "user": user,
423
+ "via_roles": sorted(via_roles),
424
+ })
425
+
426
+ privileges_result[priv] = {
427
+ "direct_roles": sorted(direct_roles),
428
+ "inherited_roles": sorted(all_roles_with_access - direct_roles),
429
+ "users": sorted(users_with_access, key=lambda u: u["user"]),
430
+ }
431
+
432
+ return {
433
+ "object_type": object_type.upper(),
434
+ "object_name": object_name.upper(),
435
+ "object_exists": True,
436
+ "privilege_filter": privilege,
437
+ "privileges": privileges_result,
438
+ }
439
+
440
+ def inspect_create(
441
+ self,
442
+ username: Optional[str],
443
+ role: Optional[str],
444
+ privilege: str,
445
+ scope: Optional[str] = None,
446
+ ) -> dict:
447
+ """
448
+ Check CREATE privileges for a user or role.
449
+ """
450
+ self.setup_state()
451
+ self.load_state()
452
+
453
+ # Resolve effective roles
454
+ if username and not role:
455
+ identity = username
456
+ identity_type = "user"
457
+ effective_roles = self.user_graph.effective_roles(username, self.role_graph)
458
+ direct_roles = set(self.user_graph.roles_of(username)[0])
459
+ all_roles = effective_roles | direct_roles
460
+ elif role and not username:
461
+ identity = role
462
+ identity_type = "role"
463
+ all_roles = {role} | self.role_graph.all_ancestors(role)
464
+ else:
465
+ raise ValueError("Must provide either username or role (not both, not neither).")
466
+
467
+ # Query CREATE grants from SQLite
468
+ create_rows = self.db.query_create_grants(
469
+ privilege=privilege,
470
+ grantee_keys=all_roles,
471
+ scope=scope,
472
+ )
473
+
474
+ # Build result structure
475
+ result = {
476
+ "privilege": privilege,
477
+ "identity": identity,
478
+ "identity_type": identity_type,
479
+ "scope": scope,
480
+ "account_wide": False,
481
+ "account_wide_roles": [],
482
+ "databases": [],
483
+ "schemas": [],
484
+ }
485
+
486
+ for row in create_rows:
487
+ granted_on = row["granted_on"]
488
+ name = row["name"]
489
+ catalog = row["table_catalog"]
490
+ grantee_key = row["grantee"]
491
+
492
+ if granted_on == "ACCOUNT":
493
+ result["account_wide"] = True
494
+ result["account_wide_roles"].append(grantee_key)
495
+
496
+ elif granted_on == "DATABASE":
497
+ db_name = name
498
+ result["databases"].append({"name": db_name, "via_role": grantee_key})
499
+
500
+ elif granted_on == "SCHEMA":
501
+ schema_fqn = f"{catalog}.{name}" if catalog else name
502
+ result["schemas"].append({"name": schema_fqn, "via_role": grantee_key})
503
+
504
+ # Deduplicate databases
505
+ seen_dbs = {}
506
+ for d in result["databases"]:
507
+ seen_dbs.setdefault(d["name"], []).append(d["via_role"])
508
+ result["databases"] = sorted(
509
+ [{"name": k, "via_roles": sorted(set(v))} for k, v in seen_dbs.items()],
510
+ key=lambda x: x["name"]
511
+ )
512
+
513
+ # Deduplicate schemas
514
+ seen_schemas = {}
515
+ for s in result["schemas"]:
516
+ seen_schemas.setdefault(s["name"], []).append(s["via_role"])
517
+ result["schemas"] = sorted(
518
+ [{"name": k, "via_roles": sorted(set(v))} for k, v in seen_schemas.items()],
519
+ key=lambda x: x["name"]
520
+ )
521
+
522
+ # Role inheritance paths when scoped
523
+ if scope and (result["schemas"] or result["databases"]):
524
+ access_paths = []
525
+ granting_roles = set()
526
+ for s in result["schemas"]:
527
+ granting_roles.update(s["via_roles"])
528
+ for d in result["databases"]:
529
+ granting_roles.update(d["via_roles"])
530
+ if result["account_wide"]:
531
+ granting_roles.update(result["account_wide_roles"])
532
+
533
+ source_role = role if identity_type == "role" else None
534
+ if source_role:
535
+ for granting_role in granting_roles:
536
+ if granting_role == source_role:
537
+ access_paths.append([source_role, "(direct grant)"])
538
+ else:
539
+ paths = self.role_graph.all_paths(source_role, granting_role)
540
+ for path in paths:
541
+ access_paths.append(path)
542
+
543
+ result["access_paths"] = access_paths
544
+
545
+ # Determine overall access
546
+ has_access = (
547
+ result["account_wide"]
548
+ or len(result["databases"]) > 0
549
+ or len(result["schemas"]) > 0
550
+ )
551
+ result["has_access"] = has_access
552
+
553
+ return result
554
+
555
+ # --- Drift detection ---
556
+
557
+ def detect_drift(self, days: int = None) -> dict:
558
+ """
559
+ Detect access changes since last refresh (or last N days).
560
+ Returns structured dict of added/revoked grants, role changes, user changes.
561
+ """
562
+ self.setup_state()
563
+
564
+ if days:
565
+ from datetime import datetime, timezone, timedelta
566
+ since = (datetime.now(timezone.utc) - timedelta(days=days)).isoformat()
567
+ else:
568
+ since = self.db.get_refreshed_at()
569
+ if not since:
570
+ return {"error": "No previous refresh found. Run 'refresh' first."}
571
+
572
+ sf = self.context.connect()
573
+ collector = AccessCollector(sf)
574
+
575
+ # Collect changes
576
+ grant_changes = collector.collect_all_grants_incremental(since)
577
+ role_changes = collector.collect_role_graph_incremental(since)
578
+ user_changes = collector.collect_user_roles_incremental(since)
579
+
580
+ return {
581
+ "since": since[:19],
582
+ "grants_added": grant_changes.get("upsert", []),
583
+ "grants_revoked": grant_changes.get("delete", []),
584
+ "roles_added": role_changes.get("added", {}),
585
+ "roles_removed": role_changes.get("removed", {}),
586
+ "users_added": user_changes.get("added", {}),
587
+ "users_removed": user_changes.get("removed", {}),
588
+ }
589
+
590
+ # --- Unused privilege detection ---
591
+
592
+ def detect_unused_privileges(self, days: int = 90, limit: int = 30) -> tuple:
593
+ """
594
+ Find roles with granted privileges that have had no query activity.
595
+ Compares granted roles against QUERY_HISTORY activity.
596
+ Returns (DataFrame, error_message).
597
+ """
598
+ import pandas as pd
599
+
600
+ self.setup_state()
601
+
602
+ sql = f"""
603
+ WITH granted_roles AS (
604
+ SELECT DISTINCT grantee AS role_key,
605
+ REPLACE(grantee, 'ACCOUNT_ROLE::', '') AS role_name
606
+ FROM grants
607
+ WHERE privilege IN ('SELECT', 'INSERT', 'UPDATE', 'DELETE')
608
+ AND granted_on IN ('TABLE', 'VIEW', 'MATERIALIZED VIEW', 'EXTERNAL TABLE')
609
+ ),
610
+ active_roles AS (
611
+ SELECT DISTINCT ROLE_NAME
612
+ FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY
613
+ WHERE START_TIME >= DATEADD(day, -{days}, CURRENT_TIMESTAMP())
614
+ AND EXECUTION_STATUS = 'SUCCESS'
615
+ AND QUERY_TYPE IN ('SELECT', 'INSERT', 'UPDATE', 'DELETE', 'MERGE')
616
+ )
617
+ SELECT g.role_name AS ROLE,
618
+ COUNT(DISTINCT gr.fqn) AS GRANTED_OBJECTS,
619
+ CASE WHEN a.ROLE_NAME IS NULL THEN 'INACTIVE' ELSE 'ACTIVE' END AS STATUS
620
+ FROM granted_roles g
621
+ LEFT JOIN grants gr ON gr.grantee = g.role_key
622
+ AND gr.privilege IN ('SELECT', 'INSERT', 'UPDATE', 'DELETE')
623
+ AND gr.granted_on IN ('TABLE', 'VIEW', 'MATERIALIZED VIEW', 'EXTERNAL TABLE')
624
+ LEFT JOIN active_roles a ON a.ROLE_NAME = g.role_name
625
+ GROUP BY g.role_name, a.ROLE_NAME
626
+ HAVING STATUS = 'INACTIVE'
627
+ ORDER BY GRANTED_OBJECTS DESC
628
+ LIMIT {limit}
629
+ """
630
+
631
+ # The grants table is local SQLite, but QUERY_HISTORY is on Snowflake.
632
+ # We need a hybrid approach: get active roles from Snowflake, compare locally.
633
+ conn = self.context.connect()
634
+ try:
635
+ with conn:
636
+ rows = conn.query(f"""
637
+ SELECT DISTINCT ROLE_NAME
638
+ FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY
639
+ WHERE START_TIME >= DATEADD(day, -{days}, CURRENT_TIMESTAMP())
640
+ AND EXECUTION_STATUS = 'SUCCESS'
641
+ AND QUERY_TYPE IN ('SELECT', 'INSERT', 'UPDATE', 'DELETE', 'MERGE')
642
+ """)
643
+ active_roles = {row["ROLE_NAME"] for row in rows}
644
+ except Exception as e:
645
+ return pd.DataFrame(), f"Could not query activity history: {e}"
646
+
647
+ # Get all roles with data grants from local SQLite
648
+ grant_rows = self.db.conn.execute("""
649
+ SELECT grantee, COUNT(DISTINCT fqn) AS object_count
650
+ FROM grants
651
+ WHERE privilege IN ('SELECT', 'INSERT', 'UPDATE', 'DELETE')
652
+ AND granted_on IN ('TABLE', 'VIEW', 'MATERIALIZED VIEW', 'EXTERNAL TABLE')
653
+ GROUP BY grantee
654
+ ORDER BY object_count DESC
655
+ """).fetchall()
656
+
657
+ results = []
658
+ for row in grant_rows:
659
+ role_key = row["grantee"]
660
+ # Extract role name from key (ACCOUNT_ROLE::ROLE_NAME)
661
+ if role_key.startswith("ACCOUNT_ROLE::"):
662
+ role_name = role_key.replace("ACCOUNT_ROLE::", "")
663
+ else:
664
+ continue # Skip database roles for now
665
+
666
+ if role_name not in active_roles:
667
+ results.append({
668
+ "ROLE": role_name,
669
+ "GRANTED_OBJECTS": row["object_count"],
670
+ "DAYS_INACTIVE": f">{days}",
671
+ })
672
+ if len(results) >= limit:
673
+ break
674
+
675
+ df = pd.DataFrame(results)
676
+ return df, None
677
+
678
+ # --- Full user access report ---
679
+
680
+ def inspect_user_report(self, username: str) -> dict:
681
+ """
682
+ Full access report for a user: all effective roles and all reachable grants.
683
+ """
684
+ self.setup_state()
685
+ self.load_state()
686
+
687
+ # Get effective roles
688
+ direct_roles, excluded_roles = self.user_graph.roles_of(username)
689
+ effective_roles = self.user_graph.effective_roles(username, self.role_graph)
690
+
691
+ # Get all grants for effective roles from SQLite
692
+ grant_rows = self.db.query_grants_by_grantees(effective_roles)
693
+
694
+ # Group by object type
695
+ by_type: dict[str, list[dict]] = {}
696
+ for g in grant_rows:
697
+ obj_type = g["granted_on"]
698
+ by_type.setdefault(obj_type, []).append(g)
699
+
700
+ # Build summary
701
+ summary = {}
702
+ for obj_type, grants in sorted(by_type.items()):
703
+ unique_objects = set(g["fqn"] for g in grants)
704
+ privileges = set(g["privilege"] for g in grants)
705
+ summary[obj_type] = {
706
+ "object_count": len(unique_objects),
707
+ "privileges": sorted(privileges),
708
+ "objects": sorted(unique_objects)[:20], # Cap for display
709
+ "total_grants": len(grants),
710
+ }
711
+
712
+ return {
713
+ "username": username,
714
+ "direct_roles": sorted(direct_roles),
715
+ "excluded_roles": sorted(excluded_roles),
716
+ "effective_roles": sorted(effective_roles),
717
+ "role_count": len(effective_roles),
718
+ "grant_summary": summary,
719
+ "total_objects": sum(s["object_count"] for s in summary.values()),
720
+ "total_grants": sum(s["total_grants"] for s in summary.values()),
721
+ }