src-auth-perms-sync 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. src_auth_perms_sync/__init__.py +1 -0
  2. src_auth_perms_sync/__main__.py +6 -0
  3. src_auth_perms_sync/cli.py +646 -0
  4. src_auth_perms_sync/orgs/__init__.py +1 -0
  5. src_auth_perms_sync/orgs/command.py +7 -0
  6. src_auth_perms_sync/orgs/queries.py +44 -0
  7. src_auth_perms_sync/orgs/sync.py +1167 -0
  8. src_auth_perms_sync/orgs/types.py +103 -0
  9. src_auth_perms_sync/permissions/__init__.py +1 -0
  10. src_auth_perms_sync/permissions/apply.py +420 -0
  11. src_auth_perms_sync/permissions/command.py +918 -0
  12. src_auth_perms_sync/permissions/full_set.py +880 -0
  13. src_auth_perms_sync/permissions/mapping.py +627 -0
  14. src_auth_perms_sync/permissions/maps.py +291 -0
  15. src_auth_perms_sync/permissions/queries.py +180 -0
  16. src_auth_perms_sync/permissions/restore.py +913 -0
  17. src_auth_perms_sync/permissions/snapshot.py +1502 -0
  18. src_auth_perms_sync/permissions/sourcegraph.py +392 -0
  19. src_auth_perms_sync/permissions/types.py +116 -0
  20. src_auth_perms_sync/permissions/workflow.py +526 -0
  21. src_auth_perms_sync/shared/__init__.py +1 -0
  22. src_auth_perms_sync/shared/backups.py +119 -0
  23. src_auth_perms_sync/shared/id_codec.py +67 -0
  24. src_auth_perms_sync/shared/queries.py +65 -0
  25. src_auth_perms_sync/shared/run_context.py +34 -0
  26. src_auth_perms_sync/shared/saml_groups.py +267 -0
  27. src_auth_perms_sync/shared/site_config.py +366 -0
  28. src_auth_perms_sync/shared/sourcegraph.py +69 -0
  29. src_auth_perms_sync/shared/types.py +69 -0
  30. src_auth_perms_sync-0.2.1.dist-info/METADATA +256 -0
  31. src_auth_perms_sync-0.2.1.dist-info/RECORD +34 -0
  32. src_auth_perms_sync-0.2.1.dist-info/WHEEL +4 -0
  33. src_auth_perms_sync-0.2.1.dist-info/entry_points.txt +2 -0
  34. src_auth_perms_sync-0.2.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,1502 @@
1
+ """Repo-permission snapshots: capture / diff / file I/O."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import datetime
6
+ import hashlib
7
+ import json
8
+ import logging
9
+ import time
10
+ from collections.abc import Callable, Iterable, Sequence
11
+ from concurrent.futures import FIRST_COMPLETED, ThreadPoolExecutor, as_completed, wait
12
+ from dataclasses import dataclass
13
+ from pathlib import Path
14
+ from typing import Any, Literal, TextIO, TypeAlias, TypedDict, cast
15
+
16
+ import src_py_lib as src
17
+
18
+ from ..shared import id_codec, run_context
19
+ from ..shared import types as shared_types
20
+ from . import sourcegraph as permissions_sourcegraph
21
+ from . import types as permission_types
22
+
23
+ log = logging.getLogger(__name__)
24
+
25
+
26
+ class RepoSnapshot(TypedDict):
27
+ name: str
28
+ explicit_permissions_users: list[str]
29
+
30
+
31
+ class SnapshotStats(TypedDict):
32
+ total_users_scanned: int
33
+ users_with_explicit_grants: int
34
+ repos_with_explicit_grants: int
35
+ total_grants: int
36
+
37
+
38
+ class Snapshot(TypedDict):
39
+ schema_version: int
40
+ captured_at: str
41
+ endpoint: str
42
+ bindID_mode: str # "USERNAME" or "EMAIL", from the GraphQL enum
43
+ config_file: str | None # absolute path of the YAML, if known
44
+ config_sha256: str | None # sha256 of the YAML at capture time
45
+ pending_bindIDs: list[str]
46
+ stats: SnapshotStats
47
+ repos: dict[str, RepoSnapshot]
48
+
49
+
50
+ class SnapshotUser(TypedDict):
51
+ id: str
52
+ username: str
53
+
54
+
55
+ SnapshotUserInput: TypeAlias = shared_types.User | SnapshotUser
56
+
57
+
58
+ def compact_snapshot_users(users: Iterable[shared_types.User]) -> list[SnapshotUser]:
59
+ """Keep only the user fields needed for later snapshot capture."""
60
+ return [{"id": user["id"], "username": user["username"]} for user in users]
61
+
62
+
63
+ class UserScopedUserSnapshot(TypedDict):
64
+ id: str
65
+ explicit_repositories: list[permission_types.Repository]
66
+
67
+
68
+ class UserScopedSnapshotStats(TypedDict):
69
+ total_users_scanned: int
70
+ users_with_explicit_grants: int
71
+ total_grants: int
72
+
73
+
74
+ class UserScopedSnapshot(TypedDict):
75
+ schema_version: int
76
+ snapshot_kind: Literal["user_scope"]
77
+ captured_at: str
78
+ endpoint: str
79
+ bindID_mode: str
80
+ config_file: str | None
81
+ config_sha256: str | None
82
+ stats: UserScopedSnapshotStats
83
+ users: dict[str, UserScopedUserSnapshot]
84
+
85
+
86
+ class SnapshotDiffSide(TypedDict):
87
+ captured_at: str
88
+ endpoint: str
89
+ bindID_mode: str
90
+ config_file: str | None
91
+ config_sha256: str | None
92
+
93
+
94
+ class SnapshotDiffPendingBindIDs(TypedDict):
95
+ added: list[str]
96
+ removed: list[str]
97
+
98
+
99
+ class SnapshotDiffSummary(TypedDict):
100
+ repos_changed: int
101
+ grants_added: int
102
+ grants_removed: int
103
+ pending_bindIDs_added: int
104
+ pending_bindIDs_removed: int
105
+
106
+
107
+ class RepositoryPermissionDiffEntry(TypedDict):
108
+ id: int
109
+ name: str
110
+ before_count: int
111
+ after_count: int
112
+ added: list[str]
113
+ removed: list[str]
114
+
115
+
116
+ class SnapshotDiff(TypedDict):
117
+ schema_version: int
118
+ diff_kind: Literal["repo_permissions"]
119
+ before: SnapshotDiffSide
120
+ after: SnapshotDiffSide
121
+ summary: SnapshotDiffSummary
122
+ pending_bindIDs: SnapshotDiffPendingBindIDs
123
+ repos: list[RepositoryPermissionDiffEntry]
124
+
125
+
126
+ class SnapshotDiffRepository(TypedDict):
127
+ id: int
128
+ name: str
129
+
130
+
131
+ class UserScopedSnapshotDiffSummary(TypedDict):
132
+ users_changed: int
133
+ grants_added: int
134
+ grants_removed: int
135
+
136
+
137
+ class UserScopedSnapshotDiffEntry(TypedDict):
138
+ username: str
139
+ id: str
140
+ before_count: int
141
+ after_count: int
142
+ added_repositories: list[SnapshotDiffRepository]
143
+ removed_repositories: list[SnapshotDiffRepository]
144
+
145
+
146
+ class UserScopedSnapshotDiff(TypedDict):
147
+ schema_version: int
148
+ diff_kind: Literal["user_scoped_permissions"]
149
+ before: SnapshotDiffSide
150
+ after: SnapshotDiffSide
151
+ summary: UserScopedSnapshotDiffSummary
152
+ users: list[UserScopedSnapshotDiffEntry]
153
+
154
+
155
+ SNAPSHOT_SCHEMA_VERSION: int = 3
156
+ USER_SCOPED_SNAPSHOT_KIND = "user_scope"
157
+ SNAPSHOT_DIFF_SCHEMA_VERSION: int = 1
158
+
159
+
160
+ def capture_explicit_grants(
161
+ client: src.SourcegraphClient,
162
+ users: Iterable[SnapshotUserInput],
163
+ parallelism: int,
164
+ explicit_permissions_batch_size: int,
165
+ total_users: int | None = None,
166
+ worker_pool: ThreadPoolExecutor | None = None,
167
+ ) -> tuple[dict[str, RepoSnapshot], int]:
168
+ """Build the per-repo inverse index of explicit-API grants.
169
+
170
+ Fetches `user.permissionsInfo.repositories(source: API)` for batches of
171
+ users in parallel via a thread pool, then inverts to `repo_id → RepoSnapshot`.
172
+
173
+ Accepts any `Iterable[User]` — including a streaming generator from
174
+ `list_users_streaming`. When passed a streaming source, this function
175
+ submits batched UserExplicitRepos calls **while** iterating, so the
176
+ submission loop blocking on the next ListUsers page overlaps with
177
+ workers consuming previously-submitted UserExplicitRepos batches. At
178
+ scale this overlaps the entire ListUsers pagination time with capture
179
+ work, removing it from the critical path.
180
+
181
+ `total_users`, when supplied, enables percentage + ETA in the
182
+ progress log lines. Callers that have already paid for `count_users()`
183
+ (e.g. `cmd_set` / `cmd_restore` in their --apply branches) should pass
184
+ it through; otherwise progress reports just show running counts and
185
+ rate. Reports fire at every ~10% of `total_users` (or every 1000
186
+ completed when total is unknown).
187
+
188
+ Sourcegraph only supports READ repository permissions, so snapshots
189
+ store only the usernames that have explicit repository grants.
190
+
191
+ Returns `(repos, user_count)` so callers (e.g. `build_snapshot`)
192
+ that need the user-count statistic don't have to materialize the
193
+ iterator twice or measure it themselves.
194
+ """
195
+ # Invert directly as each per-user fetch completes. Store only repo IDs
196
+ # first, then hydrate each unique repo name once after all users complete.
197
+ usernames_by_repository_id: dict[str, list[str]] = {}
198
+
199
+ def _fetch(
200
+ batch_users: list[SnapshotUserInput],
201
+ ) -> tuple[dict[str, list[str]], int]:
202
+ # High-frequency (one per user-batch):
203
+ # - log the whole event (start + end) at DEBUG; failures still
204
+ # get bumped to ERROR by the event() helper
205
+ # - drop the per-event `status="ok"` / `error_type=null` noise on
206
+ # successes (failures still carry both fields)
207
+ # - omit user IDs since usernames are far more readable
208
+ with src.event(
209
+ "user_explicit_repos_batch_fetch",
210
+ level="DEBUG",
211
+ omit_success_status=True,
212
+ user_count=len(batch_users),
213
+ ) as fetch_event:
214
+ try:
215
+ repository_ids_by_user_id = permissions_sourcegraph.list_users_explicit_repo_ids(
216
+ client,
217
+ [user["id"] for user in batch_users],
218
+ batch_size=explicit_permissions_batch_size,
219
+ )
220
+ failures = 0
221
+ except Exception as exception:
222
+ log.warning(
223
+ "Failed to batch-fetch explicit grants for %d user(s): %s. "
224
+ "Falling back to one query per user.",
225
+ len(batch_users),
226
+ exception,
227
+ )
228
+ repository_ids_by_user_id, failures = _fetch_one_user_at_a_time(batch_users)
229
+ repository_ids_by_username = {
230
+ user["username"]: repository_ids_by_user_id.get(user["id"], [])
231
+ for user in batch_users
232
+ }
233
+ fetch_event["repo_count"] = sum(
234
+ len(repository_ids) for repository_ids in repository_ids_by_username.values()
235
+ )
236
+ fetch_event["per_user_failures"] = failures
237
+ return repository_ids_by_username, failures
238
+
239
+ def _fetch_one_user_at_a_time(
240
+ batch_users: list[SnapshotUserInput],
241
+ ) -> tuple[dict[str, list[str]], int]:
242
+ repository_ids_by_user_id: dict[str, list[str]] = {}
243
+ failures = 0
244
+ for user in batch_users:
245
+ try:
246
+ repository_ids_by_user_id[user["id"]] = (
247
+ permissions_sourcegraph.list_user_explicit_repo_ids(
248
+ client,
249
+ user["id"],
250
+ )
251
+ )
252
+ except Exception as exception:
253
+ failures += 1
254
+ log.warning(
255
+ "Failed to fetch explicit grants for user=%s: %s",
256
+ user["username"],
257
+ exception,
258
+ )
259
+ repository_ids_by_user_id[user["id"]] = []
260
+ return repository_ids_by_user_id, failures
261
+
262
+ with src.event(
263
+ "capture_explicit_grants",
264
+ total_users=total_users,
265
+ explicit_permissions_batch_size=explicit_permissions_batch_size,
266
+ ) as capture_event:
267
+ capture_failures = 0
268
+ futures: dict[Any, list[SnapshotUserInput]] = {}
269
+ submitted_user_count = 0
270
+ max_pending_batches = max(1, parallelism * 2)
271
+
272
+ def _submit_batch(
273
+ executor: ThreadPoolExecutor,
274
+ batch_users: list[SnapshotUserInput],
275
+ ) -> None:
276
+ nonlocal submitted_user_count
277
+ if not batch_users:
278
+ return
279
+ submitted_batch = list(batch_users)
280
+ submitted_user_count += len(submitted_batch)
281
+ future = src.submit_with_log_context(executor, _fetch, submitted_batch)
282
+ futures[future] = submitted_batch
283
+
284
+ # Progress reporting: every 10% when total is known (max 10
285
+ # lines), every 1000 otherwise. Avoids drowning the operator on
286
+ # tiny instances and gives steady feedback on large ones.
287
+ progress_step = max(1, total_users // 10) if total_users else 1000
288
+ # Start the timer BEFORE submission. The submit-while-iterating
289
+ # loop blocks on ListUsers pagination, but workers process
290
+ # already-submitted tasks during those blocks — so by the time
291
+ # the submit loop finishes, many futures may already be done.
292
+ # Anchoring `progress_started` here means the first progress
293
+ # line shows real wall-clock work time, not zero.
294
+ progress_started = time.perf_counter()
295
+ completed = 0
296
+ next_progress_report = progress_step
297
+ all_users_submitted = False
298
+
299
+ def _record_completed_futures(done_futures: Iterable[Any]) -> None:
300
+ nonlocal capture_failures, completed, next_progress_report
301
+ for future in done_futures:
302
+ submitted_batch = futures.pop(future)
303
+ completed += len(submitted_batch)
304
+ try:
305
+ repository_ids_by_username, failures = future.result()
306
+ capture_failures += failures
307
+ for username, repository_ids in repository_ids_by_username.items():
308
+ for repository_id in repository_ids:
309
+ usernames_by_repository_id.setdefault(
310
+ repository_id,
311
+ [],
312
+ ).append(username)
313
+ except Exception as exception:
314
+ # Don't blow up the whole capture; warn so the operator
315
+ # can see the users whose grants were treated as empty.
316
+ capture_failures += len(submitted_batch)
317
+ log.warning(
318
+ "Failed to fetch explicit grants for %d user(s): %s",
319
+ len(submitted_batch),
320
+ exception,
321
+ )
322
+
323
+ if completed >= next_progress_report or (
324
+ all_users_submitted and completed == submitted_user_count
325
+ ):
326
+ elapsed = time.perf_counter() - progress_started
327
+ rate = completed / elapsed if elapsed > 0 else 0.0
328
+ if total_users:
329
+ remaining = max(total_users - completed, 0)
330
+ eta_seconds = remaining / rate if rate > 0 else 0.0
331
+ log.info(
332
+ "Captured explicit permissions for %d / %d users (%.0f%%) "
333
+ "in %.0fs (%.0f users/sec, ETA %.0fs).",
334
+ completed,
335
+ total_users,
336
+ 100.0 * completed / total_users,
337
+ elapsed,
338
+ rate,
339
+ eta_seconds,
340
+ )
341
+ else:
342
+ log.info(
343
+ "Captured explicit permissions for %d users in %.0fs (%.0f users/sec).",
344
+ completed,
345
+ elapsed,
346
+ rate,
347
+ )
348
+ while next_progress_report <= completed:
349
+ next_progress_report += progress_step
350
+
351
+ # Submit-while-iterating. Iterating `users` may block on each
352
+ # ListUsers page when a streaming iterator is passed; during those
353
+ # blocks, workers continue processing already-submitted tasks.
354
+ with run_context.thread_pool(parallelism, worker_pool) as executor:
355
+ batch_users: list[SnapshotUserInput] = []
356
+ for user in users:
357
+ batch_users.append(user)
358
+ if len(batch_users) >= explicit_permissions_batch_size:
359
+ _submit_batch(executor, batch_users)
360
+ batch_users = []
361
+ if len(futures) >= max_pending_batches:
362
+ done_futures, _ = wait(futures, return_when=FIRST_COMPLETED)
363
+ _record_completed_futures(done_futures)
364
+ _submit_batch(executor, batch_users)
365
+ all_users_submitted = True
366
+
367
+ while futures:
368
+ done_futures, _ = wait(futures, return_when=FIRST_COMPLETED)
369
+ _record_completed_futures(done_futures)
370
+ capture_event["user_count"] = submitted_user_count
371
+ capture_event["per_user_failures"] = capture_failures
372
+ capture_event["max_pending_batches"] = max_pending_batches
373
+
374
+ # Stable sort: users alphabetical within each repo.
375
+ for usernames in usernames_by_repository_id.values():
376
+ usernames.sort()
377
+
378
+ with src.event(
379
+ "hydrate_explicit_repository_names",
380
+ repository_count=len(usernames_by_repository_id),
381
+ ) as hydrate_event:
382
+ repositories_by_id = permissions_sourcegraph.list_repositories_by_ids(
383
+ client,
384
+ usernames_by_repository_id.keys(),
385
+ )
386
+ hydrate_event["hydrated_repository_count"] = len(repositories_by_id)
387
+
388
+ repos_out: dict[str, RepoSnapshot] = {}
389
+ for repository_id, usernames in usernames_by_repository_id.items():
390
+ repos_out[repository_id] = {
391
+ "name": _snapshot_repository_name(repositories_by_id, repository_id),
392
+ "explicit_permissions_users": usernames,
393
+ }
394
+
395
+ return repos_out, submitted_user_count
396
+
397
+
398
+ def _snapshot_repository_name(
399
+ repositories_by_id: dict[str, permission_types.Repository],
400
+ repository_id: str,
401
+ ) -> str:
402
+ repository = repositories_by_id.get(repository_id)
403
+ if repository is not None:
404
+ return repository["name"]
405
+ try:
406
+ decoded_repository_id = id_codec.decode_repository_id(repository_id)
407
+ return f"<repository id={decoded_repository_id}>"
408
+ except ValueError:
409
+ return f"<repository id={repository_id}>"
410
+
411
+
412
+ def build_snapshot(
413
+ client: src.SourcegraphClient,
414
+ users: Iterable[SnapshotUserInput],
415
+ parallelism: int,
416
+ bind_id_mode: str,
417
+ config_path: Path | None = None,
418
+ *,
419
+ total_users: int | None = None,
420
+ explicit_permissions_batch_size: int,
421
+ worker_pool: ThreadPoolExecutor | None = None,
422
+ ) -> Snapshot:
423
+ """Capture a full Snapshot: explicit grants + pending-bindIDs + metadata.
424
+
425
+ `users` may be a streaming iterator (see `list_users_streaming`); this
426
+ function delegates iteration to `capture_explicit_grants` which submits
427
+ batched work as the iterator yields, so ListUsers pagination overlaps
428
+ with UserExplicitRepos work.
429
+
430
+ `total_users`, when known, drives percentage + ETA in the per-batch
431
+ progress log lines emitted by `capture_explicit_grants`.
432
+ """
433
+ with src.event("build_snapshot", bind_id_mode=bind_id_mode) as build_event:
434
+ repos, user_count = capture_explicit_grants(
435
+ client,
436
+ users,
437
+ parallelism,
438
+ explicit_permissions_batch_size,
439
+ total_users=total_users,
440
+ worker_pool=worker_pool,
441
+ )
442
+ pending = permissions_sourcegraph.list_pending_bind_ids(client)
443
+
444
+ config_sha: str | None = None
445
+ if config_path is not None and config_path.exists():
446
+ config_sha = hashlib.sha256(config_path.read_bytes()).hexdigest()
447
+
448
+ distinct_users: set[str] = set()
449
+ total_grants = 0
450
+ for repo in repos.values():
451
+ for username in repo["explicit_permissions_users"]:
452
+ distinct_users.add(username)
453
+ total_grants += 1
454
+ build_event["user_count"] = user_count
455
+ build_event["repos_with_explicit_grants"] = len(repos)
456
+ build_event["users_with_explicit_grants"] = len(distinct_users)
457
+ build_event["total_grants"] = total_grants
458
+ build_event["pending_bindIDs_count"] = len(pending)
459
+
460
+ return {
461
+ "schema_version": SNAPSHOT_SCHEMA_VERSION,
462
+ "captured_at": datetime.datetime.now(datetime.UTC).isoformat(timespec="seconds"),
463
+ "endpoint": client.endpoint,
464
+ "bindID_mode": bind_id_mode,
465
+ "config_file": str(config_path.resolve()) if config_path else None,
466
+ "config_sha256": config_sha,
467
+ "pending_bindIDs": sorted(pending),
468
+ "stats": {
469
+ "total_users_scanned": user_count,
470
+ "users_with_explicit_grants": len(distinct_users),
471
+ "repos_with_explicit_grants": len(repos),
472
+ "total_grants": total_grants,
473
+ },
474
+ "repos": dict(sorted(repos.items())), # sort by repo_id for stable file
475
+ }
476
+
477
+
478
+ def capture_user_scoped_explicit_grants(
479
+ client: src.SourcegraphClient,
480
+ users: Iterable[SnapshotUser],
481
+ parallelism: int,
482
+ worker_pool: ThreadPoolExecutor | None = None,
483
+ ) -> dict[str, UserScopedUserSnapshot]:
484
+ """Capture explicit API grants for only the supplied users."""
485
+ scoped_users: dict[str, UserScopedUserSnapshot] = {}
486
+
487
+ def _fetch(user: SnapshotUser) -> tuple[SnapshotUser, list[permission_types.Repository]]:
488
+ with src.event(
489
+ "user_scoped_explicit_repos_fetch",
490
+ level="DEBUG",
491
+ omit_success_status=True,
492
+ username=user["username"],
493
+ ) as fetch_event:
494
+ repos = permissions_sourcegraph.list_user_explicit_repos(client, user["id"])
495
+ fetch_event["repo_count"] = len(repos)
496
+ return user, repos
497
+
498
+ with src.event("capture_user_scoped_explicit_grants") as capture_event:
499
+ futures: dict[Any, SnapshotUser] = {}
500
+ with run_context.thread_pool(parallelism, worker_pool) as executor:
501
+ for user in users:
502
+ futures[src.submit_with_log_context(executor, _fetch, user)] = user
503
+ for future in as_completed(futures):
504
+ user = futures[future]
505
+ fetched_user: SnapshotUser
506
+ repos: list[permission_types.Repository]
507
+ try:
508
+ fetched_user, repos = future.result()
509
+ except Exception as exception:
510
+ log.warning(
511
+ "Failed to fetch scoped explicit grants for user=%s: %s",
512
+ user["username"],
513
+ exception,
514
+ )
515
+ fetched_user, repos = user, []
516
+ scoped_users[fetched_user["username"]] = {
517
+ "id": fetched_user["id"],
518
+ "explicit_repositories": sorted(repos, key=lambda repo: repo["name"]),
519
+ }
520
+ capture_event["user_count"] = len(scoped_users)
521
+ capture_event["total_grants"] = sum(
522
+ len(user_snapshot["explicit_repositories"]) for user_snapshot in scoped_users.values()
523
+ )
524
+ return dict(sorted(scoped_users.items()))
525
+
526
+
527
+ def build_user_scoped_snapshot(
528
+ client: src.SourcegraphClient,
529
+ users: Iterable[SnapshotUser],
530
+ parallelism: int,
531
+ bind_id_mode: str,
532
+ config_path: Path | None = None,
533
+ worker_pool: ThreadPoolExecutor | None = None,
534
+ ) -> UserScopedSnapshot:
535
+ """Capture a reversible snapshot for only the supplied users."""
536
+ with src.event("build_user_scoped_snapshot", bind_id_mode=bind_id_mode) as build_event:
537
+ scoped_users = capture_user_scoped_explicit_grants(
538
+ client,
539
+ users,
540
+ parallelism,
541
+ worker_pool=worker_pool,
542
+ )
543
+ config_sha: str | None = None
544
+ if config_path is not None and config_path.exists():
545
+ config_sha = hashlib.sha256(config_path.read_bytes()).hexdigest()
546
+
547
+ total_grants = sum(
548
+ len(user_snapshot["explicit_repositories"]) for user_snapshot in scoped_users.values()
549
+ )
550
+ users_with_explicit_grants = sum(
551
+ 1 for user_snapshot in scoped_users.values() if user_snapshot["explicit_repositories"]
552
+ )
553
+ build_event["user_count"] = len(scoped_users)
554
+ build_event["users_with_explicit_grants"] = users_with_explicit_grants
555
+ build_event["total_grants"] = total_grants
556
+
557
+ return {
558
+ "schema_version": SNAPSHOT_SCHEMA_VERSION,
559
+ "snapshot_kind": USER_SCOPED_SNAPSHOT_KIND,
560
+ "captured_at": datetime.datetime.now(datetime.UTC).isoformat(timespec="seconds"),
561
+ "endpoint": client.endpoint,
562
+ "bindID_mode": bind_id_mode,
563
+ "config_file": str(config_path.resolve()) if config_path else None,
564
+ "config_sha256": config_sha,
565
+ "stats": {
566
+ "total_users_scanned": len(scoped_users),
567
+ "users_with_explicit_grants": users_with_explicit_grants,
568
+ "total_grants": total_grants,
569
+ },
570
+ "users": scoped_users,
571
+ }
572
+
573
+
574
+ def _write_pretty_json(path: Path, value: Any) -> int:
575
+ """Write pretty JSON without materializing the encoded string first."""
576
+ with path.open("w", encoding="utf-8") as output:
577
+ json.dump(value, output, indent=2, sort_keys=False)
578
+ output.write("\n")
579
+ return path.stat().st_size
580
+
581
+
582
+ def _write_top_level_json_field(
583
+ output: TextIO,
584
+ name: str,
585
+ value: object,
586
+ *,
587
+ first: bool,
588
+ ) -> None:
589
+ if not first:
590
+ output.write(",\n")
591
+ output.write(f" {json.dumps(name)}: ")
592
+ output.write(json.dumps(value, indent=2).replace("\n", "\n "))
593
+
594
+
595
+ def _write_string_list(output: TextIO, values: Sequence[str], indent: int) -> None:
596
+ if not values:
597
+ output.write("[]")
598
+ return
599
+ output.write("[\n")
600
+ value_indent = " " * (indent + 2)
601
+ for index, value in enumerate(values):
602
+ if index:
603
+ output.write(",\n")
604
+ output.write(value_indent)
605
+ json.dump(value, output)
606
+ output.write("\n" + " " * indent + "]")
607
+
608
+
609
+ def _write_repo_snapshot_value(output: TextIO, repo: RepoSnapshot, indent: int) -> None:
610
+ field_indent = " " * (indent + 2)
611
+ output.write("{\n")
612
+ output.write(f'{field_indent}"name": ')
613
+ json.dump(repo["name"], output)
614
+ output.write(",\n")
615
+ output.write(f'{field_indent}"explicit_permissions_users": ')
616
+ _write_string_list(output, repo["explicit_permissions_users"], indent + 2)
617
+ output.write("\n" + " " * indent + "}")
618
+
619
+
620
+ def _write_repository_value(output: TextIO, repository: permission_types.Repository) -> None:
621
+ output.write("{")
622
+ output.write('"id": ')
623
+ json.dump(id_codec.decode_repository_id(repository["id"]), output)
624
+ output.write(', "name": ')
625
+ json.dump(repository["name"], output)
626
+ output.write("}")
627
+
628
+
629
+ def _write_repository_list(
630
+ output: TextIO,
631
+ repositories: Sequence[permission_types.Repository],
632
+ indent: int,
633
+ ) -> None:
634
+ if not repositories:
635
+ output.write("[]")
636
+ return
637
+ output.write("[\n")
638
+ value_indent = " " * (indent + 2)
639
+ for index, repository in enumerate(repositories):
640
+ if index:
641
+ output.write(",\n")
642
+ output.write(value_indent)
643
+ _write_repository_value(output, repository)
644
+ output.write("\n" + " " * indent + "]")
645
+
646
+
647
+ def _write_user_scoped_snapshot_value(
648
+ output: TextIO,
649
+ user_snapshot: UserScopedUserSnapshot,
650
+ indent: int,
651
+ ) -> None:
652
+ field_indent = " " * (indent + 2)
653
+ output.write("{\n")
654
+ output.write(f'{field_indent}"id": ')
655
+ json.dump(user_snapshot["id"], output)
656
+ output.write(",\n")
657
+ output.write(f'{field_indent}"explicit_repositories": ')
658
+ _write_repository_list(output, user_snapshot["explicit_repositories"], indent + 2)
659
+ output.write("\n" + " " * indent + "}")
660
+
661
+
662
+ def _write_snapshot_json(
663
+ path: Path,
664
+ snapshot: Snapshot,
665
+ repos: Iterable[tuple[str, RepoSnapshot]],
666
+ ) -> int:
667
+ """Write a full snapshot without duplicating the repo map for ID decoding."""
668
+ with path.open("w", encoding="utf-8") as output:
669
+ output.write("{\n")
670
+ first = True
671
+ fields: tuple[tuple[str, object], ...] = (
672
+ ("schema_version", snapshot["schema_version"]),
673
+ ("captured_at", snapshot["captured_at"]),
674
+ ("endpoint", snapshot["endpoint"]),
675
+ ("bindID_mode", snapshot["bindID_mode"]),
676
+ ("config_file", snapshot["config_file"]),
677
+ ("config_sha256", snapshot["config_sha256"]),
678
+ ("pending_bindIDs", snapshot["pending_bindIDs"]),
679
+ ("stats", snapshot["stats"]),
680
+ )
681
+ for field_name, value in fields:
682
+ _write_top_level_json_field(
683
+ output,
684
+ field_name,
685
+ value,
686
+ first=first,
687
+ )
688
+ first = False
689
+
690
+ output.write(',\n "repos": {')
691
+ wrote_repo = False
692
+ for repo_id, repo in repos:
693
+ if wrote_repo:
694
+ output.write(",")
695
+ output.write("\n ")
696
+ json.dump(str(id_codec.decode_repository_id(repo_id)), output)
697
+ output.write(": ")
698
+ _write_repo_snapshot_value(output, repo, 4)
699
+ wrote_repo = True
700
+ if wrote_repo:
701
+ output.write("\n }")
702
+ else:
703
+ output.write("}")
704
+ output.write("\n}\n")
705
+ return path.stat().st_size
706
+
707
+
708
+ def write_snapshot_with_repos(
709
+ path: Path,
710
+ snapshot: Snapshot,
711
+ repos: Iterable[tuple[str, RepoSnapshot]],
712
+ ) -> None:
713
+ """Persist a full snapshot from an iterable of repo entries."""
714
+ with src.event(
715
+ "disk_io",
716
+ level="DEBUG",
717
+ op="write",
718
+ path=str(path),
719
+ file_kind="snapshot",
720
+ ) as disk_event:
721
+ path.parent.mkdir(parents=True, exist_ok=True)
722
+ disk_event["bytes"] = _write_snapshot_json(path, snapshot, repos)
723
+
724
+
725
+ def write_snapshot(path: Path, snapshot: Snapshot) -> None:
726
+ """Persist a snapshot to disk as pretty-printed JSON with stable ordering.
727
+
728
+ Repo IDs are decoded from their opaque GraphQL Node form
729
+ (`Repository:<int>` base64) to plain integer DB primary keys before
730
+ write — they're far easier to grep, diff, and read by eye.
731
+ `read_snapshot` re-encodes them on load so the in-memory shape (and
732
+ every consumer of `Snapshot`) keeps using opaque IDs unchanged.
733
+ """
734
+ write_snapshot_with_repos(path, snapshot, snapshot["repos"].items())
735
+
736
+
737
+ def write_user_scoped_snapshot(path: Path, snapshot: UserScopedSnapshot) -> None:
738
+ """Persist a user-scoped snapshot with readable repository IDs."""
739
+ with src.event(
740
+ "disk_io",
741
+ level="DEBUG",
742
+ op="write",
743
+ path=str(path),
744
+ file_kind="user_scoped_snapshot",
745
+ ) as disk_event:
746
+ path.parent.mkdir(parents=True, exist_ok=True)
747
+ with path.open("w", encoding="utf-8") as output:
748
+ output.write("{\n")
749
+ first = True
750
+ fields: tuple[tuple[str, object], ...] = (
751
+ ("schema_version", snapshot["schema_version"]),
752
+ ("snapshot_kind", snapshot["snapshot_kind"]),
753
+ ("captured_at", snapshot["captured_at"]),
754
+ ("endpoint", snapshot["endpoint"]),
755
+ ("bindID_mode", snapshot["bindID_mode"]),
756
+ ("config_file", snapshot["config_file"]),
757
+ ("config_sha256", snapshot["config_sha256"]),
758
+ ("stats", snapshot["stats"]),
759
+ )
760
+ for field_name, value in fields:
761
+ _write_top_level_json_field(
762
+ output,
763
+ field_name,
764
+ value,
765
+ first=first,
766
+ )
767
+ first = False
768
+
769
+ output.write(',\n "users": {')
770
+ wrote_user = False
771
+ for username, user_snapshot in snapshot["users"].items():
772
+ if wrote_user:
773
+ output.write(",")
774
+ output.write("\n ")
775
+ json.dump(username, output)
776
+ output.write(": ")
777
+ _write_user_scoped_snapshot_value(output, user_snapshot, 4)
778
+ wrote_user = True
779
+ if wrote_user:
780
+ output.write("\n }")
781
+ else:
782
+ output.write("}")
783
+ output.write("\n}\n")
784
+ disk_event["bytes"] = path.stat().st_size
785
+
786
+
787
+ def _read_snapshot_raw(path: Path, file_kind: str) -> dict[str, Any]:
788
+ with src.event(
789
+ "disk_io",
790
+ level="DEBUG",
791
+ op="read",
792
+ path=str(path),
793
+ file_kind=file_kind,
794
+ ) as disk_event:
795
+ disk_event["bytes"] = path.stat().st_size
796
+ with path.open(encoding="utf-8") as snapshot_file:
797
+ return cast(dict[str, Any], json.load(snapshot_file))
798
+
799
+
800
+ def _validate_snapshot_schema_version(path: Path, version: object) -> None:
801
+ """Validate snapshot schema version."""
802
+ if version == SNAPSHOT_SCHEMA_VERSION:
803
+ return
804
+ raise SystemExit(
805
+ f"{path}: snapshot schema_version is {version!r}, "
806
+ f"expected {SNAPSHOT_SCHEMA_VERSION}. Refusing to load."
807
+ )
808
+
809
+
810
+ def _encode_full_snapshot_raw(path: Path, raw: dict[str, Any]) -> Snapshot:
811
+ _validate_snapshot_schema_version(path, raw.get("schema_version"))
812
+ if raw.get("snapshot_kind") == USER_SCOPED_SNAPSHOT_KIND:
813
+ raise SystemExit(f"{path}: snapshot_kind is 'user_scope', expected full repo snapshot.")
814
+ on_disk_repos = cast(dict[str, RepoSnapshot], raw.get("repos", {}))
815
+ raw["repos"] = {
816
+ id_codec.encode_repository_id(int(repo_id)): repo for repo_id, repo in on_disk_repos.items()
817
+ }
818
+ return cast(Snapshot, raw)
819
+
820
+
821
+ def _encode_user_scoped_snapshot_raw(path: Path, raw: dict[str, Any]) -> UserScopedSnapshot:
822
+ _validate_snapshot_schema_version(path, raw.get("schema_version"))
823
+ kind = raw.get("snapshot_kind")
824
+ if kind != USER_SCOPED_SNAPSHOT_KIND:
825
+ raise SystemExit(f"{path}: snapshot_kind is {kind!r}, expected 'user_scope'.")
826
+
827
+ on_disk_users = cast(dict[str, dict[str, Any]], raw.get("users", {}))
828
+ raw["users"] = {
829
+ username: {
830
+ "id": user_snapshot["id"],
831
+ "explicit_repositories": [
832
+ {
833
+ "id": id_codec.encode_repository_id(int(repo["id"])),
834
+ "name": cast(str, repo["name"]),
835
+ }
836
+ for repo in cast(list[dict[str, Any]], user_snapshot["explicit_repositories"])
837
+ ],
838
+ }
839
+ for username, user_snapshot in on_disk_users.items()
840
+ }
841
+ return cast(UserScopedSnapshot, raw)
842
+
843
+
844
+ def read_snapshot_file(path: Path) -> Snapshot | UserScopedSnapshot:
845
+ """Load either supported snapshot kind from disk with one JSON parse."""
846
+ raw = _read_snapshot_raw(path, "snapshot")
847
+ if raw.get("snapshot_kind") == USER_SCOPED_SNAPSHOT_KIND:
848
+ return _encode_user_scoped_snapshot_raw(path, raw)
849
+ return _encode_full_snapshot_raw(path, raw)
850
+
851
+
852
+ def read_snapshot(path: Path) -> Snapshot:
853
+ """Load a snapshot from disk. Validates schema_version.
854
+
855
+ Re-encodes integer repo IDs from disk back to opaque GraphQL Node
856
+ IDs (`Repository:<int>` base64) so callers see the same shape that
857
+ `build_snapshot` produces in memory.
858
+ """
859
+ return _encode_full_snapshot_raw(path, _read_snapshot_raw(path, "snapshot"))
860
+
861
+
862
+ def read_user_scoped_snapshot(path: Path) -> UserScopedSnapshot:
863
+ """Load a user-scoped snapshot and re-encode repository IDs."""
864
+ return _encode_user_scoped_snapshot_raw(
865
+ path,
866
+ _read_snapshot_raw(path, "user_scoped_snapshot"),
867
+ )
868
+
869
+
870
+ class RepoDiff(TypedDict):
871
+ name: str
872
+ added: list[str]
873
+ removed: list[str]
874
+
875
+
876
+ @dataclass(frozen=True)
877
+ class _SnapshotDiffPlan:
878
+ changed_repo_ids: list[str]
879
+ grants_added: int
880
+ grants_removed: int
881
+ pending_added: list[str]
882
+ pending_removed: list[str]
883
+
884
+
885
+ def _sorted_usernames(values: Sequence[str]) -> Sequence[str]:
886
+ if all(values[index - 1] <= values[index] for index in range(1, len(values))):
887
+ return values
888
+ return sorted(values)
889
+
890
+
891
+ def _repo_usernames(repo: RepoSnapshot | None) -> Sequence[str]:
892
+ if repo is None:
893
+ return ()
894
+ return repo["explicit_permissions_users"]
895
+
896
+
897
+ def _sorted_username_diff_counts(
898
+ before_usernames: Sequence[str],
899
+ after_usernames: Sequence[str],
900
+ ) -> tuple[int, int]:
901
+ if before_usernames == after_usernames:
902
+ return 0, 0
903
+ before_sorted = _sorted_usernames(before_usernames)
904
+ after_sorted = _sorted_usernames(after_usernames)
905
+ before_index = 0
906
+ after_index = 0
907
+ added = 0
908
+ removed = 0
909
+ while before_index < len(before_sorted) and after_index < len(after_sorted):
910
+ before_username = before_sorted[before_index]
911
+ after_username = after_sorted[after_index]
912
+ if before_username == after_username:
913
+ before_index += 1
914
+ after_index += 1
915
+ elif before_username < after_username:
916
+ removed += 1
917
+ before_index += 1
918
+ else:
919
+ added += 1
920
+ after_index += 1
921
+ removed += len(before_sorted) - before_index
922
+ added += len(after_sorted) - after_index
923
+ return added, removed
924
+
925
+
926
+ def _sorted_username_diff_values(
927
+ before_usernames: Sequence[str],
928
+ after_usernames: Sequence[str],
929
+ ) -> tuple[list[str], list[str]]:
930
+ if before_usernames == after_usernames:
931
+ return [], []
932
+ before_sorted = _sorted_usernames(before_usernames)
933
+ after_sorted = _sorted_usernames(after_usernames)
934
+ before_index = 0
935
+ after_index = 0
936
+ added: list[str] = []
937
+ removed: list[str] = []
938
+ while before_index < len(before_sorted) and after_index < len(after_sorted):
939
+ before_username = before_sorted[before_index]
940
+ after_username = after_sorted[after_index]
941
+ if before_username == after_username:
942
+ before_index += 1
943
+ after_index += 1
944
+ elif before_username < after_username:
945
+ removed.append(before_username)
946
+ before_index += 1
947
+ else:
948
+ added.append(after_username)
949
+ after_index += 1
950
+ removed.extend(before_sorted[before_index:])
951
+ added.extend(after_sorted[after_index:])
952
+ return added, removed
953
+
954
+
955
+ def diff_snapshots(
956
+ before: dict[str, RepoSnapshot],
957
+ after: dict[str, RepoSnapshot],
958
+ ) -> dict[str, RepoDiff]:
959
+ """Compute per-repo {added, removed} bindID lists.
960
+
961
+ Repos present in only one side appear with the appropriate users
962
+ in `added` (after-only) or `removed` (before-only). Repos with
963
+ identical user lists on both sides are omitted entirely from the result.
964
+ """
965
+ diff: dict[str, RepoDiff] = {}
966
+ for repo_id in set(before) | set(after):
967
+ before_entry = before.get(repo_id)
968
+ after_entry = after.get(repo_id)
969
+ added, removed = _sorted_username_diff_values(
970
+ _repo_usernames(before_entry),
971
+ _repo_usernames(after_entry),
972
+ )
973
+ if not added and not removed:
974
+ continue
975
+ # prefer post-state name
976
+ name = (after_entry or before_entry or {"name": "<unknown>"})["name"]
977
+ diff[repo_id] = {
978
+ "name": name,
979
+ "added": added,
980
+ "removed": removed,
981
+ }
982
+ return diff
983
+
984
+
985
+ def _snapshot_diff_repo_name(
986
+ before: Snapshot,
987
+ after_repo_for_id: Callable[[str], RepoSnapshot | None],
988
+ repo_id: str,
989
+ ) -> str:
990
+ after_repo = after_repo_for_id(repo_id)
991
+ before_repo = before["repos"].get(repo_id)
992
+ return (after_repo or before_repo or {"name": "<unknown>"})["name"]
993
+
994
+
995
+ def _plan_snapshot_diff(
996
+ before: Snapshot,
997
+ after: Snapshot,
998
+ repo_ids: Iterable[str],
999
+ after_repo_for_id: Callable[[str], RepoSnapshot | None],
1000
+ ) -> _SnapshotDiffPlan:
1001
+ changed_repo_ids: list[str] = []
1002
+ grants_added = 0
1003
+ grants_removed = 0
1004
+ for repo_id in repo_ids:
1005
+ before_repo = before["repos"].get(repo_id)
1006
+ after_repo = after_repo_for_id(repo_id)
1007
+ added_count, removed_count = _sorted_username_diff_counts(
1008
+ _repo_usernames(before_repo),
1009
+ _repo_usernames(after_repo),
1010
+ )
1011
+ if not added_count and not removed_count:
1012
+ continue
1013
+ changed_repo_ids.append(repo_id)
1014
+ grants_added += added_count
1015
+ grants_removed += removed_count
1016
+
1017
+ changed_repo_ids.sort(
1018
+ key=lambda repo_id: _snapshot_diff_repo_name(before, after_repo_for_id, repo_id)
1019
+ )
1020
+ before_pending = set(before["pending_bindIDs"])
1021
+ after_pending = set(after["pending_bindIDs"])
1022
+ return _SnapshotDiffPlan(
1023
+ changed_repo_ids=changed_repo_ids,
1024
+ grants_added=grants_added,
1025
+ grants_removed=grants_removed,
1026
+ pending_added=sorted(after_pending - before_pending),
1027
+ pending_removed=sorted(before_pending - after_pending),
1028
+ )
1029
+
1030
+
1031
+ def _snapshot_diff_entry(
1032
+ before: Snapshot,
1033
+ after_repo_for_id: Callable[[str], RepoSnapshot | None],
1034
+ repo_id: str,
1035
+ ) -> RepositoryPermissionDiffEntry:
1036
+ before_repo = before["repos"].get(repo_id)
1037
+ after_repo = after_repo_for_id(repo_id)
1038
+ added, removed = _sorted_username_diff_values(
1039
+ _repo_usernames(before_repo),
1040
+ _repo_usernames(after_repo),
1041
+ )
1042
+ return {
1043
+ "id": id_codec.decode_repository_id(repo_id),
1044
+ "name": _snapshot_diff_repo_name(before, after_repo_for_id, repo_id),
1045
+ "before_count": _permission_count(before_repo),
1046
+ "after_count": _permission_count(after_repo),
1047
+ "added": added,
1048
+ "removed": removed,
1049
+ }
1050
+
1051
+
1052
+ def _snapshot_diff_summary(plan: _SnapshotDiffPlan) -> SnapshotDiffSummary:
1053
+ return {
1054
+ "repos_changed": len(plan.changed_repo_ids),
1055
+ "grants_added": plan.grants_added,
1056
+ "grants_removed": plan.grants_removed,
1057
+ "pending_bindIDs_added": len(plan.pending_added),
1058
+ "pending_bindIDs_removed": len(plan.pending_removed),
1059
+ }
1060
+
1061
+
1062
+ def _snapshot_diff_pending_bind_ids(
1063
+ plan: _SnapshotDiffPlan,
1064
+ ) -> SnapshotDiffPendingBindIDs:
1065
+ return {"added": plan.pending_added, "removed": plan.pending_removed}
1066
+
1067
+
1068
+ def build_snapshot_diff(before: Snapshot, after: Snapshot) -> SnapshotDiff:
1069
+ """Return a compact JSON-serializable diff between two full snapshots."""
1070
+ after_repo_for_id = after["repos"].get
1071
+ plan = _plan_snapshot_diff(
1072
+ before,
1073
+ after,
1074
+ set(before["repos"]) | set(after["repos"]),
1075
+ after_repo_for_id,
1076
+ )
1077
+ repos = [
1078
+ _snapshot_diff_entry(before, after_repo_for_id, repo_id)
1079
+ for repo_id in plan.changed_repo_ids
1080
+ ]
1081
+ return {
1082
+ "schema_version": SNAPSHOT_DIFF_SCHEMA_VERSION,
1083
+ "diff_kind": "repo_permissions",
1084
+ "before": _snapshot_diff_side(before),
1085
+ "after": _snapshot_diff_side(after),
1086
+ "summary": _snapshot_diff_summary(plan),
1087
+ "pending_bindIDs": _snapshot_diff_pending_bind_ids(plan),
1088
+ "repos": repos,
1089
+ }
1090
+
1091
+
1092
+ def _write_snapshot_diff_entry(
1093
+ output: TextIO,
1094
+ entry: RepositoryPermissionDiffEntry,
1095
+ indent: int,
1096
+ ) -> None:
1097
+ field_indent = " " * (indent + 2)
1098
+ output.write("{\n")
1099
+ fields: tuple[tuple[str, object], ...] = (
1100
+ ("id", entry["id"]),
1101
+ ("name", entry["name"]),
1102
+ ("before_count", entry["before_count"]),
1103
+ ("after_count", entry["after_count"]),
1104
+ )
1105
+ for index, (field_name, value) in enumerate(fields):
1106
+ if index:
1107
+ output.write(",\n")
1108
+ output.write(f"{field_indent}{json.dumps(field_name)}: ")
1109
+ json.dump(value, output)
1110
+ output.write(",\n")
1111
+ output.write(f'{field_indent}"added": ')
1112
+ _write_string_list(output, entry["added"], indent + 2)
1113
+ output.write(",\n")
1114
+ output.write(f'{field_indent}"removed": ')
1115
+ _write_string_list(output, entry["removed"], indent + 2)
1116
+ output.write("\n" + " " * indent + "}")
1117
+
1118
+
1119
+ def _write_snapshot_diff_json(
1120
+ path: Path,
1121
+ before: Snapshot,
1122
+ after: Snapshot,
1123
+ plan: _SnapshotDiffPlan,
1124
+ after_repo_for_id: Callable[[str], RepoSnapshot | None],
1125
+ ) -> int:
1126
+ with path.open("w", encoding="utf-8") as output:
1127
+ output.write("{\n")
1128
+ fields: tuple[tuple[str, object], ...] = (
1129
+ ("schema_version", SNAPSHOT_DIFF_SCHEMA_VERSION),
1130
+ ("diff_kind", "repo_permissions"),
1131
+ ("before", _snapshot_diff_side(before)),
1132
+ ("after", _snapshot_diff_side(after)),
1133
+ ("summary", _snapshot_diff_summary(plan)),
1134
+ ("pending_bindIDs", _snapshot_diff_pending_bind_ids(plan)),
1135
+ )
1136
+ first = True
1137
+ for field_name, value in fields:
1138
+ _write_top_level_json_field(output, field_name, value, first=first)
1139
+ first = False
1140
+
1141
+ output.write(',\n "repos": [')
1142
+ wrote_repo = False
1143
+ for repo_id in plan.changed_repo_ids:
1144
+ if wrote_repo:
1145
+ output.write(",")
1146
+ output.write("\n ")
1147
+ _write_snapshot_diff_entry(
1148
+ output,
1149
+ _snapshot_diff_entry(before, after_repo_for_id, repo_id),
1150
+ 4,
1151
+ )
1152
+ wrote_repo = True
1153
+ if wrote_repo:
1154
+ output.write("\n ]")
1155
+ else:
1156
+ output.write("]")
1157
+ output.write("\n}\n")
1158
+ return path.stat().st_size
1159
+
1160
+
1161
+ def write_snapshot_diff_from_snapshot_parts(
1162
+ path: Path,
1163
+ before: Snapshot,
1164
+ after: Snapshot,
1165
+ repo_ids: Iterable[str],
1166
+ after_repo_for_id: Callable[[str], RepoSnapshot | None],
1167
+ ) -> None:
1168
+ """Persist a full-snapshot diff without materializing every repo diff."""
1169
+ plan = _plan_snapshot_diff(before, after, repo_ids, after_repo_for_id)
1170
+ with src.event(
1171
+ "disk_io",
1172
+ level="DEBUG",
1173
+ op="write",
1174
+ path=str(path),
1175
+ file_kind="snapshot_diff",
1176
+ ) as disk_event:
1177
+ path.parent.mkdir(parents=True, exist_ok=True)
1178
+ disk_event["bytes"] = _write_snapshot_diff_json(
1179
+ path,
1180
+ before,
1181
+ after,
1182
+ plan,
1183
+ after_repo_for_id,
1184
+ )
1185
+
1186
+
1187
+ def write_snapshot_diff_from_snapshots(path: Path, before: Snapshot, after: Snapshot) -> None:
1188
+ """Persist a compact diff between two full snapshots."""
1189
+ write_snapshot_diff_from_snapshot_parts(
1190
+ path,
1191
+ before,
1192
+ after,
1193
+ set(before["repos"]) | set(after["repos"]),
1194
+ after["repos"].get,
1195
+ )
1196
+
1197
+
1198
+ def write_snapshot_diff(path: Path, diff: SnapshotDiff) -> None:
1199
+ """Persist a compact full-snapshot diff as pretty-printed JSON."""
1200
+ with src.event(
1201
+ "disk_io",
1202
+ level="DEBUG",
1203
+ op="write",
1204
+ path=str(path),
1205
+ file_kind="snapshot_diff",
1206
+ ) as disk_event:
1207
+ path.parent.mkdir(parents=True, exist_ok=True)
1208
+ disk_event["bytes"] = _write_pretty_json(path, diff)
1209
+
1210
+
1211
+ def build_user_scoped_snapshot_diff(
1212
+ before: UserScopedSnapshot,
1213
+ after: UserScopedSnapshot,
1214
+ ) -> UserScopedSnapshotDiff:
1215
+ """Return a compact JSON-serializable diff between two scoped snapshots."""
1216
+ users: list[UserScopedSnapshotDiffEntry] = []
1217
+ grants_added = 0
1218
+ grants_removed = 0
1219
+ for username in sorted(set(before["users"]) | set(after["users"])):
1220
+ before_user = before["users"].get(username)
1221
+ after_user = after["users"].get(username)
1222
+ before_repositories = _repositories_by_id(before_user)
1223
+ after_repositories = _repositories_by_id(after_user)
1224
+ before_ids = set(before_repositories)
1225
+ after_ids = set(after_repositories)
1226
+ added_ids = sorted(after_ids - before_ids, key=lambda repo_id: after_repositories[repo_id])
1227
+ removed_ids = sorted(
1228
+ before_ids - after_ids,
1229
+ key=lambda repo_id: before_repositories[repo_id],
1230
+ )
1231
+ if not added_ids and not removed_ids:
1232
+ continue
1233
+ grants_added += len(added_ids)
1234
+ grants_removed += len(removed_ids)
1235
+ if after_user is not None:
1236
+ user_id = after_user["id"]
1237
+ elif before_user is not None:
1238
+ user_id = before_user["id"]
1239
+ else:
1240
+ continue
1241
+ users.append(
1242
+ {
1243
+ "username": username,
1244
+ "id": user_id,
1245
+ "before_count": len(before_repositories),
1246
+ "after_count": len(after_repositories),
1247
+ "added_repositories": [
1248
+ _snapshot_diff_repository(repo_id, after_repositories[repo_id])
1249
+ for repo_id in added_ids
1250
+ ],
1251
+ "removed_repositories": [
1252
+ _snapshot_diff_repository(repo_id, before_repositories[repo_id])
1253
+ for repo_id in removed_ids
1254
+ ],
1255
+ }
1256
+ )
1257
+ return {
1258
+ "schema_version": SNAPSHOT_DIFF_SCHEMA_VERSION,
1259
+ "diff_kind": "user_scoped_permissions",
1260
+ "before": _snapshot_diff_side(before),
1261
+ "after": _snapshot_diff_side(after),
1262
+ "summary": {
1263
+ "users_changed": len(users),
1264
+ "grants_added": grants_added,
1265
+ "grants_removed": grants_removed,
1266
+ },
1267
+ "users": users,
1268
+ }
1269
+
1270
+
1271
+ def write_user_scoped_snapshot_diff(path: Path, diff: UserScopedSnapshotDiff) -> None:
1272
+ """Persist a compact user-scoped snapshot diff as pretty-printed JSON."""
1273
+ with src.event(
1274
+ "disk_io",
1275
+ level="DEBUG",
1276
+ op="write",
1277
+ path=str(path),
1278
+ file_kind="user_scoped_snapshot_diff",
1279
+ ) as disk_event:
1280
+ path.parent.mkdir(parents=True, exist_ok=True)
1281
+ disk_event["bytes"] = _write_pretty_json(path, diff)
1282
+
1283
+
1284
+ MAX_RENDERED_DIFF_ENTRIES = 50
1285
+ MAX_RENDERED_DIFF_VALUES = 50
1286
+
1287
+
1288
+ def _render_limited_values(values: list[str], max_values: int) -> str:
1289
+ if len(values) <= max_values:
1290
+ return ", ".join(values)
1291
+ visible_values = values[:max_values]
1292
+ omitted_count = len(values) - max_values
1293
+ return f"{', '.join(visible_values)}, ... ({omitted_count} more)"
1294
+
1295
+
1296
+ def render_diff(
1297
+ diff: dict[str, RepoDiff],
1298
+ max_repos: int = MAX_RENDERED_DIFF_ENTRIES,
1299
+ max_usernames_per_section: int = MAX_RENDERED_DIFF_VALUES,
1300
+ ) -> str:
1301
+ """Format a diff dict as a human-readable multi-line string."""
1302
+ if not diff:
1303
+ return "No changes."
1304
+ lines: list[str] = []
1305
+ sorted_diff = sorted(diff.items(), key=lambda item: item[1]["name"])
1306
+ total_added = sum(len(repo_diff["added"]) for repo_diff in diff.values())
1307
+ total_removed = sum(len(repo_diff["removed"]) for repo_diff in diff.values())
1308
+ for repo_id, repo_diff in sorted_diff[:max_repos]:
1309
+ lines.append(f"=== {repo_diff['name']} (id={id_codec.decode_repository_id(repo_id)}) ===")
1310
+ if repo_diff["added"]:
1311
+ lines.append(
1312
+ " + added ({count}): {usernames}".format(
1313
+ count=len(repo_diff["added"]),
1314
+ usernames=_render_limited_values(
1315
+ repo_diff["added"],
1316
+ max_usernames_per_section,
1317
+ ),
1318
+ )
1319
+ )
1320
+ if repo_diff["removed"]:
1321
+ lines.append(
1322
+ " - removed ({count}): {usernames}".format(
1323
+ count=len(repo_diff["removed"]),
1324
+ usernames=_render_limited_values(
1325
+ repo_diff["removed"],
1326
+ max_usernames_per_section,
1327
+ ),
1328
+ )
1329
+ )
1330
+ omitted_repos = len(sorted_diff) - max_repos
1331
+ if omitted_repos > 0:
1332
+ lines.append(
1333
+ f"... {omitted_repos} more repo(s) omitted from log output; "
1334
+ "see diff.json for full added/removed lists."
1335
+ )
1336
+ lines.append("")
1337
+ lines.append(
1338
+ f"Summary: {len(diff)} repo(s) changed; "
1339
+ f"{total_added} grant(s) added, {total_removed} grant(s) removed."
1340
+ )
1341
+ return "\n".join(lines)
1342
+
1343
+
1344
+ def render_snapshot_diff_from_snapshot_parts(
1345
+ before: Snapshot,
1346
+ after: Snapshot,
1347
+ repo_ids: Iterable[str],
1348
+ after_repo_for_id: Callable[[str], RepoSnapshot | None],
1349
+ max_repos: int = MAX_RENDERED_DIFF_ENTRIES,
1350
+ max_usernames_per_section: int = MAX_RENDERED_DIFF_VALUES,
1351
+ ) -> str:
1352
+ """Format a capped human diff without materializing the full diff."""
1353
+ plan = _plan_snapshot_diff(before, after, repo_ids, after_repo_for_id)
1354
+ if not plan.changed_repo_ids:
1355
+ return "No changes."
1356
+
1357
+ lines: list[str] = []
1358
+ for repo_id in plan.changed_repo_ids[:max_repos]:
1359
+ entry = _snapshot_diff_entry(before, after_repo_for_id, repo_id)
1360
+ lines.append(f"=== {entry['name']} (id={entry['id']}) ===")
1361
+ if entry["added"]:
1362
+ lines.append(
1363
+ " + added ({count}): {usernames}".format(
1364
+ count=len(entry["added"]),
1365
+ usernames=_render_limited_values(
1366
+ entry["added"],
1367
+ max_usernames_per_section,
1368
+ ),
1369
+ )
1370
+ )
1371
+ if entry["removed"]:
1372
+ lines.append(
1373
+ " - removed ({count}): {usernames}".format(
1374
+ count=len(entry["removed"]),
1375
+ usernames=_render_limited_values(
1376
+ entry["removed"],
1377
+ max_usernames_per_section,
1378
+ ),
1379
+ )
1380
+ )
1381
+ omitted_repos = len(plan.changed_repo_ids) - max_repos
1382
+ if omitted_repos > 0:
1383
+ lines.append(
1384
+ f"... {omitted_repos} more repo(s) omitted from log output; "
1385
+ "see diff.json for full added/removed lists."
1386
+ )
1387
+ lines.append("")
1388
+ lines.append(
1389
+ f"Summary: {len(plan.changed_repo_ids)} repo(s) changed; "
1390
+ f"{plan.grants_added} grant(s) added, {plan.grants_removed} grant(s) removed."
1391
+ )
1392
+ return "\n".join(lines)
1393
+
1394
+
1395
+ def render_snapshot_diff(
1396
+ before: Snapshot,
1397
+ after: Snapshot,
1398
+ max_repos: int = MAX_RENDERED_DIFF_ENTRIES,
1399
+ max_usernames_per_section: int = MAX_RENDERED_DIFF_VALUES,
1400
+ ) -> str:
1401
+ """Format a capped human diff between two full snapshots."""
1402
+ return render_snapshot_diff_from_snapshot_parts(
1403
+ before,
1404
+ after,
1405
+ set(before["repos"]) | set(after["repos"]),
1406
+ after["repos"].get,
1407
+ max_repos,
1408
+ max_usernames_per_section,
1409
+ )
1410
+
1411
+
1412
+ def render_user_scoped_diff(
1413
+ before: UserScopedSnapshot,
1414
+ after: UserScopedSnapshot,
1415
+ max_users: int = MAX_RENDERED_DIFF_ENTRIES,
1416
+ max_repositories_per_section: int = MAX_RENDERED_DIFF_VALUES,
1417
+ ) -> str:
1418
+ """Format a user-scoped snapshot diff as human-readable text."""
1419
+ lines: list[str] = []
1420
+ total_added = 0
1421
+ total_removed = 0
1422
+ changed_users = 0
1423
+ for username in sorted(set(before["users"]) | set(after["users"])):
1424
+ before_repositories = _repositories_by_id(before["users"].get(username))
1425
+ after_repositories = _repositories_by_id(after["users"].get(username))
1426
+ before_ids = set(before_repositories)
1427
+ after_ids = set(after_repositories)
1428
+ added_ids = sorted(after_ids - before_ids, key=lambda repo_id: after_repositories[repo_id])
1429
+ removed_ids = sorted(
1430
+ before_ids - after_ids,
1431
+ key=lambda repo_id: before_repositories[repo_id],
1432
+ )
1433
+ if not added_ids and not removed_ids:
1434
+ continue
1435
+ changed_users += 1
1436
+ total_added += len(added_ids)
1437
+ total_removed += len(removed_ids)
1438
+ if changed_users > max_users:
1439
+ continue
1440
+ lines.append(f"=== {username} ===")
1441
+ if added_ids:
1442
+ lines.append(
1443
+ " + added ({count}): {repos}".format(
1444
+ count=len(added_ids),
1445
+ repos=_render_limited_values(
1446
+ [after_repositories[repo_id] for repo_id in added_ids],
1447
+ max_repositories_per_section,
1448
+ ),
1449
+ )
1450
+ )
1451
+ if removed_ids:
1452
+ lines.append(
1453
+ " - removed ({count}): {repos}".format(
1454
+ count=len(removed_ids),
1455
+ repos=_render_limited_values(
1456
+ [before_repositories[repo_id] for repo_id in removed_ids],
1457
+ max_repositories_per_section,
1458
+ ),
1459
+ )
1460
+ )
1461
+ if not lines:
1462
+ return "No changes."
1463
+ omitted_users = changed_users - max_users
1464
+ if omitted_users > 0:
1465
+ lines.append(
1466
+ f"... {omitted_users} more user(s) omitted from log output; "
1467
+ "see diff.json for full added/removed lists."
1468
+ )
1469
+ lines.append("")
1470
+ lines.append(f"Summary: {total_added} grant(s) added, {total_removed} grant(s) removed.")
1471
+ return "\n".join(lines)
1472
+
1473
+
1474
+ def _repositories_by_id(
1475
+ user_snapshot: UserScopedUserSnapshot | None,
1476
+ ) -> dict[str, str]:
1477
+ if user_snapshot is None:
1478
+ return {}
1479
+ return {
1480
+ repository["id"]: repository["name"]
1481
+ for repository in user_snapshot["explicit_repositories"]
1482
+ }
1483
+
1484
+
1485
+ def _permission_count(repo_snapshot: RepoSnapshot | None) -> int:
1486
+ if repo_snapshot is None:
1487
+ return 0
1488
+ return len(repo_snapshot["explicit_permissions_users"])
1489
+
1490
+
1491
+ def _snapshot_diff_side(snapshot: Snapshot | UserScopedSnapshot) -> SnapshotDiffSide:
1492
+ return {
1493
+ "captured_at": snapshot["captured_at"],
1494
+ "endpoint": snapshot["endpoint"],
1495
+ "bindID_mode": snapshot["bindID_mode"],
1496
+ "config_file": snapshot["config_file"],
1497
+ "config_sha256": snapshot["config_sha256"],
1498
+ }
1499
+
1500
+
1501
+ def _snapshot_diff_repository(repo_id: str, repo_name: str) -> SnapshotDiffRepository:
1502
+ return {"id": id_codec.decode_repository_id(repo_id), "name": repo_name}