src-auth-perms-sync 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- src_auth_perms_sync/__init__.py +1 -0
- src_auth_perms_sync/__main__.py +6 -0
- src_auth_perms_sync/cli.py +646 -0
- src_auth_perms_sync/orgs/__init__.py +1 -0
- src_auth_perms_sync/orgs/command.py +7 -0
- src_auth_perms_sync/orgs/queries.py +44 -0
- src_auth_perms_sync/orgs/sync.py +1167 -0
- src_auth_perms_sync/orgs/types.py +103 -0
- src_auth_perms_sync/permissions/__init__.py +1 -0
- src_auth_perms_sync/permissions/apply.py +420 -0
- src_auth_perms_sync/permissions/command.py +918 -0
- src_auth_perms_sync/permissions/full_set.py +880 -0
- src_auth_perms_sync/permissions/mapping.py +627 -0
- src_auth_perms_sync/permissions/maps.py +291 -0
- src_auth_perms_sync/permissions/queries.py +180 -0
- src_auth_perms_sync/permissions/restore.py +913 -0
- src_auth_perms_sync/permissions/snapshot.py +1502 -0
- src_auth_perms_sync/permissions/sourcegraph.py +392 -0
- src_auth_perms_sync/permissions/types.py +116 -0
- src_auth_perms_sync/permissions/workflow.py +526 -0
- src_auth_perms_sync/shared/__init__.py +1 -0
- src_auth_perms_sync/shared/backups.py +119 -0
- src_auth_perms_sync/shared/id_codec.py +67 -0
- src_auth_perms_sync/shared/queries.py +65 -0
- src_auth_perms_sync/shared/run_context.py +34 -0
- src_auth_perms_sync/shared/saml_groups.py +267 -0
- src_auth_perms_sync/shared/site_config.py +366 -0
- src_auth_perms_sync/shared/sourcegraph.py +69 -0
- src_auth_perms_sync/shared/types.py +69 -0
- src_auth_perms_sync-0.2.1.dist-info/METADATA +256 -0
- src_auth_perms_sync-0.2.1.dist-info/RECORD +34 -0
- src_auth_perms_sync-0.2.1.dist-info/WHEEL +4 -0
- src_auth_perms_sync-0.2.1.dist-info/entry_points.txt +2 -0
- src_auth_perms_sync-0.2.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,1502 @@
|
|
|
1
|
+
"""Repo-permission snapshots: capture / diff / file I/O."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import datetime
|
|
6
|
+
import hashlib
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
import time
|
|
10
|
+
from collections.abc import Callable, Iterable, Sequence
|
|
11
|
+
from concurrent.futures import FIRST_COMPLETED, ThreadPoolExecutor, as_completed, wait
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Literal, TextIO, TypeAlias, TypedDict, cast
|
|
15
|
+
|
|
16
|
+
import src_py_lib as src
|
|
17
|
+
|
|
18
|
+
from ..shared import id_codec, run_context
|
|
19
|
+
from ..shared import types as shared_types
|
|
20
|
+
from . import sourcegraph as permissions_sourcegraph
|
|
21
|
+
from . import types as permission_types
|
|
22
|
+
|
|
23
|
+
log = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class RepoSnapshot(TypedDict):
|
|
27
|
+
name: str
|
|
28
|
+
explicit_permissions_users: list[str]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class SnapshotStats(TypedDict):
|
|
32
|
+
total_users_scanned: int
|
|
33
|
+
users_with_explicit_grants: int
|
|
34
|
+
repos_with_explicit_grants: int
|
|
35
|
+
total_grants: int
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class Snapshot(TypedDict):
|
|
39
|
+
schema_version: int
|
|
40
|
+
captured_at: str
|
|
41
|
+
endpoint: str
|
|
42
|
+
bindID_mode: str # "USERNAME" or "EMAIL", from the GraphQL enum
|
|
43
|
+
config_file: str | None # absolute path of the YAML, if known
|
|
44
|
+
config_sha256: str | None # sha256 of the YAML at capture time
|
|
45
|
+
pending_bindIDs: list[str]
|
|
46
|
+
stats: SnapshotStats
|
|
47
|
+
repos: dict[str, RepoSnapshot]
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class SnapshotUser(TypedDict):
|
|
51
|
+
id: str
|
|
52
|
+
username: str
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
SnapshotUserInput: TypeAlias = shared_types.User | SnapshotUser
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def compact_snapshot_users(users: Iterable[shared_types.User]) -> list[SnapshotUser]:
|
|
59
|
+
"""Keep only the user fields needed for later snapshot capture."""
|
|
60
|
+
return [{"id": user["id"], "username": user["username"]} for user in users]
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class UserScopedUserSnapshot(TypedDict):
|
|
64
|
+
id: str
|
|
65
|
+
explicit_repositories: list[permission_types.Repository]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class UserScopedSnapshotStats(TypedDict):
|
|
69
|
+
total_users_scanned: int
|
|
70
|
+
users_with_explicit_grants: int
|
|
71
|
+
total_grants: int
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class UserScopedSnapshot(TypedDict):
|
|
75
|
+
schema_version: int
|
|
76
|
+
snapshot_kind: Literal["user_scope"]
|
|
77
|
+
captured_at: str
|
|
78
|
+
endpoint: str
|
|
79
|
+
bindID_mode: str
|
|
80
|
+
config_file: str | None
|
|
81
|
+
config_sha256: str | None
|
|
82
|
+
stats: UserScopedSnapshotStats
|
|
83
|
+
users: dict[str, UserScopedUserSnapshot]
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class SnapshotDiffSide(TypedDict):
|
|
87
|
+
captured_at: str
|
|
88
|
+
endpoint: str
|
|
89
|
+
bindID_mode: str
|
|
90
|
+
config_file: str | None
|
|
91
|
+
config_sha256: str | None
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class SnapshotDiffPendingBindIDs(TypedDict):
|
|
95
|
+
added: list[str]
|
|
96
|
+
removed: list[str]
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class SnapshotDiffSummary(TypedDict):
|
|
100
|
+
repos_changed: int
|
|
101
|
+
grants_added: int
|
|
102
|
+
grants_removed: int
|
|
103
|
+
pending_bindIDs_added: int
|
|
104
|
+
pending_bindIDs_removed: int
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class RepositoryPermissionDiffEntry(TypedDict):
|
|
108
|
+
id: int
|
|
109
|
+
name: str
|
|
110
|
+
before_count: int
|
|
111
|
+
after_count: int
|
|
112
|
+
added: list[str]
|
|
113
|
+
removed: list[str]
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class SnapshotDiff(TypedDict):
|
|
117
|
+
schema_version: int
|
|
118
|
+
diff_kind: Literal["repo_permissions"]
|
|
119
|
+
before: SnapshotDiffSide
|
|
120
|
+
after: SnapshotDiffSide
|
|
121
|
+
summary: SnapshotDiffSummary
|
|
122
|
+
pending_bindIDs: SnapshotDiffPendingBindIDs
|
|
123
|
+
repos: list[RepositoryPermissionDiffEntry]
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class SnapshotDiffRepository(TypedDict):
|
|
127
|
+
id: int
|
|
128
|
+
name: str
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class UserScopedSnapshotDiffSummary(TypedDict):
|
|
132
|
+
users_changed: int
|
|
133
|
+
grants_added: int
|
|
134
|
+
grants_removed: int
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class UserScopedSnapshotDiffEntry(TypedDict):
|
|
138
|
+
username: str
|
|
139
|
+
id: str
|
|
140
|
+
before_count: int
|
|
141
|
+
after_count: int
|
|
142
|
+
added_repositories: list[SnapshotDiffRepository]
|
|
143
|
+
removed_repositories: list[SnapshotDiffRepository]
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class UserScopedSnapshotDiff(TypedDict):
|
|
147
|
+
schema_version: int
|
|
148
|
+
diff_kind: Literal["user_scoped_permissions"]
|
|
149
|
+
before: SnapshotDiffSide
|
|
150
|
+
after: SnapshotDiffSide
|
|
151
|
+
summary: UserScopedSnapshotDiffSummary
|
|
152
|
+
users: list[UserScopedSnapshotDiffEntry]
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
SNAPSHOT_SCHEMA_VERSION: int = 3
|
|
156
|
+
USER_SCOPED_SNAPSHOT_KIND = "user_scope"
|
|
157
|
+
SNAPSHOT_DIFF_SCHEMA_VERSION: int = 1
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def capture_explicit_grants(
|
|
161
|
+
client: src.SourcegraphClient,
|
|
162
|
+
users: Iterable[SnapshotUserInput],
|
|
163
|
+
parallelism: int,
|
|
164
|
+
explicit_permissions_batch_size: int,
|
|
165
|
+
total_users: int | None = None,
|
|
166
|
+
worker_pool: ThreadPoolExecutor | None = None,
|
|
167
|
+
) -> tuple[dict[str, RepoSnapshot], int]:
|
|
168
|
+
"""Build the per-repo inverse index of explicit-API grants.
|
|
169
|
+
|
|
170
|
+
Fetches `user.permissionsInfo.repositories(source: API)` for batches of
|
|
171
|
+
users in parallel via a thread pool, then inverts to `repo_id → RepoSnapshot`.
|
|
172
|
+
|
|
173
|
+
Accepts any `Iterable[User]` — including a streaming generator from
|
|
174
|
+
`list_users_streaming`. When passed a streaming source, this function
|
|
175
|
+
submits batched UserExplicitRepos calls **while** iterating, so the
|
|
176
|
+
submission loop blocking on the next ListUsers page overlaps with
|
|
177
|
+
workers consuming previously-submitted UserExplicitRepos batches. At
|
|
178
|
+
scale this overlaps the entire ListUsers pagination time with capture
|
|
179
|
+
work, removing it from the critical path.
|
|
180
|
+
|
|
181
|
+
`total_users`, when supplied, enables percentage + ETA in the
|
|
182
|
+
progress log lines. Callers that have already paid for `count_users()`
|
|
183
|
+
(e.g. `cmd_set` / `cmd_restore` in their --apply branches) should pass
|
|
184
|
+
it through; otherwise progress reports just show running counts and
|
|
185
|
+
rate. Reports fire at every ~10% of `total_users` (or every 1000
|
|
186
|
+
completed when total is unknown).
|
|
187
|
+
|
|
188
|
+
Sourcegraph only supports READ repository permissions, so snapshots
|
|
189
|
+
store only the usernames that have explicit repository grants.
|
|
190
|
+
|
|
191
|
+
Returns `(repos, user_count)` so callers (e.g. `build_snapshot`)
|
|
192
|
+
that need the user-count statistic don't have to materialize the
|
|
193
|
+
iterator twice or measure it themselves.
|
|
194
|
+
"""
|
|
195
|
+
# Invert directly as each per-user fetch completes. Store only repo IDs
|
|
196
|
+
# first, then hydrate each unique repo name once after all users complete.
|
|
197
|
+
usernames_by_repository_id: dict[str, list[str]] = {}
|
|
198
|
+
|
|
199
|
+
def _fetch(
|
|
200
|
+
batch_users: list[SnapshotUserInput],
|
|
201
|
+
) -> tuple[dict[str, list[str]], int]:
|
|
202
|
+
# High-frequency (one per user-batch):
|
|
203
|
+
# - log the whole event (start + end) at DEBUG; failures still
|
|
204
|
+
# get bumped to ERROR by the event() helper
|
|
205
|
+
# - drop the per-event `status="ok"` / `error_type=null` noise on
|
|
206
|
+
# successes (failures still carry both fields)
|
|
207
|
+
# - omit user IDs since usernames are far more readable
|
|
208
|
+
with src.event(
|
|
209
|
+
"user_explicit_repos_batch_fetch",
|
|
210
|
+
level="DEBUG",
|
|
211
|
+
omit_success_status=True,
|
|
212
|
+
user_count=len(batch_users),
|
|
213
|
+
) as fetch_event:
|
|
214
|
+
try:
|
|
215
|
+
repository_ids_by_user_id = permissions_sourcegraph.list_users_explicit_repo_ids(
|
|
216
|
+
client,
|
|
217
|
+
[user["id"] for user in batch_users],
|
|
218
|
+
batch_size=explicit_permissions_batch_size,
|
|
219
|
+
)
|
|
220
|
+
failures = 0
|
|
221
|
+
except Exception as exception:
|
|
222
|
+
log.warning(
|
|
223
|
+
"Failed to batch-fetch explicit grants for %d user(s): %s. "
|
|
224
|
+
"Falling back to one query per user.",
|
|
225
|
+
len(batch_users),
|
|
226
|
+
exception,
|
|
227
|
+
)
|
|
228
|
+
repository_ids_by_user_id, failures = _fetch_one_user_at_a_time(batch_users)
|
|
229
|
+
repository_ids_by_username = {
|
|
230
|
+
user["username"]: repository_ids_by_user_id.get(user["id"], [])
|
|
231
|
+
for user in batch_users
|
|
232
|
+
}
|
|
233
|
+
fetch_event["repo_count"] = sum(
|
|
234
|
+
len(repository_ids) for repository_ids in repository_ids_by_username.values()
|
|
235
|
+
)
|
|
236
|
+
fetch_event["per_user_failures"] = failures
|
|
237
|
+
return repository_ids_by_username, failures
|
|
238
|
+
|
|
239
|
+
def _fetch_one_user_at_a_time(
|
|
240
|
+
batch_users: list[SnapshotUserInput],
|
|
241
|
+
) -> tuple[dict[str, list[str]], int]:
|
|
242
|
+
repository_ids_by_user_id: dict[str, list[str]] = {}
|
|
243
|
+
failures = 0
|
|
244
|
+
for user in batch_users:
|
|
245
|
+
try:
|
|
246
|
+
repository_ids_by_user_id[user["id"]] = (
|
|
247
|
+
permissions_sourcegraph.list_user_explicit_repo_ids(
|
|
248
|
+
client,
|
|
249
|
+
user["id"],
|
|
250
|
+
)
|
|
251
|
+
)
|
|
252
|
+
except Exception as exception:
|
|
253
|
+
failures += 1
|
|
254
|
+
log.warning(
|
|
255
|
+
"Failed to fetch explicit grants for user=%s: %s",
|
|
256
|
+
user["username"],
|
|
257
|
+
exception,
|
|
258
|
+
)
|
|
259
|
+
repository_ids_by_user_id[user["id"]] = []
|
|
260
|
+
return repository_ids_by_user_id, failures
|
|
261
|
+
|
|
262
|
+
with src.event(
|
|
263
|
+
"capture_explicit_grants",
|
|
264
|
+
total_users=total_users,
|
|
265
|
+
explicit_permissions_batch_size=explicit_permissions_batch_size,
|
|
266
|
+
) as capture_event:
|
|
267
|
+
capture_failures = 0
|
|
268
|
+
futures: dict[Any, list[SnapshotUserInput]] = {}
|
|
269
|
+
submitted_user_count = 0
|
|
270
|
+
max_pending_batches = max(1, parallelism * 2)
|
|
271
|
+
|
|
272
|
+
def _submit_batch(
|
|
273
|
+
executor: ThreadPoolExecutor,
|
|
274
|
+
batch_users: list[SnapshotUserInput],
|
|
275
|
+
) -> None:
|
|
276
|
+
nonlocal submitted_user_count
|
|
277
|
+
if not batch_users:
|
|
278
|
+
return
|
|
279
|
+
submitted_batch = list(batch_users)
|
|
280
|
+
submitted_user_count += len(submitted_batch)
|
|
281
|
+
future = src.submit_with_log_context(executor, _fetch, submitted_batch)
|
|
282
|
+
futures[future] = submitted_batch
|
|
283
|
+
|
|
284
|
+
# Progress reporting: every 10% when total is known (max 10
|
|
285
|
+
# lines), every 1000 otherwise. Avoids drowning the operator on
|
|
286
|
+
# tiny instances and gives steady feedback on large ones.
|
|
287
|
+
progress_step = max(1, total_users // 10) if total_users else 1000
|
|
288
|
+
# Start the timer BEFORE submission. The submit-while-iterating
|
|
289
|
+
# loop blocks on ListUsers pagination, but workers process
|
|
290
|
+
# already-submitted tasks during those blocks — so by the time
|
|
291
|
+
# the submit loop finishes, many futures may already be done.
|
|
292
|
+
# Anchoring `progress_started` here means the first progress
|
|
293
|
+
# line shows real wall-clock work time, not zero.
|
|
294
|
+
progress_started = time.perf_counter()
|
|
295
|
+
completed = 0
|
|
296
|
+
next_progress_report = progress_step
|
|
297
|
+
all_users_submitted = False
|
|
298
|
+
|
|
299
|
+
def _record_completed_futures(done_futures: Iterable[Any]) -> None:
|
|
300
|
+
nonlocal capture_failures, completed, next_progress_report
|
|
301
|
+
for future in done_futures:
|
|
302
|
+
submitted_batch = futures.pop(future)
|
|
303
|
+
completed += len(submitted_batch)
|
|
304
|
+
try:
|
|
305
|
+
repository_ids_by_username, failures = future.result()
|
|
306
|
+
capture_failures += failures
|
|
307
|
+
for username, repository_ids in repository_ids_by_username.items():
|
|
308
|
+
for repository_id in repository_ids:
|
|
309
|
+
usernames_by_repository_id.setdefault(
|
|
310
|
+
repository_id,
|
|
311
|
+
[],
|
|
312
|
+
).append(username)
|
|
313
|
+
except Exception as exception:
|
|
314
|
+
# Don't blow up the whole capture; warn so the operator
|
|
315
|
+
# can see the users whose grants were treated as empty.
|
|
316
|
+
capture_failures += len(submitted_batch)
|
|
317
|
+
log.warning(
|
|
318
|
+
"Failed to fetch explicit grants for %d user(s): %s",
|
|
319
|
+
len(submitted_batch),
|
|
320
|
+
exception,
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
if completed >= next_progress_report or (
|
|
324
|
+
all_users_submitted and completed == submitted_user_count
|
|
325
|
+
):
|
|
326
|
+
elapsed = time.perf_counter() - progress_started
|
|
327
|
+
rate = completed / elapsed if elapsed > 0 else 0.0
|
|
328
|
+
if total_users:
|
|
329
|
+
remaining = max(total_users - completed, 0)
|
|
330
|
+
eta_seconds = remaining / rate if rate > 0 else 0.0
|
|
331
|
+
log.info(
|
|
332
|
+
"Captured explicit permissions for %d / %d users (%.0f%%) "
|
|
333
|
+
"in %.0fs (%.0f users/sec, ETA %.0fs).",
|
|
334
|
+
completed,
|
|
335
|
+
total_users,
|
|
336
|
+
100.0 * completed / total_users,
|
|
337
|
+
elapsed,
|
|
338
|
+
rate,
|
|
339
|
+
eta_seconds,
|
|
340
|
+
)
|
|
341
|
+
else:
|
|
342
|
+
log.info(
|
|
343
|
+
"Captured explicit permissions for %d users in %.0fs (%.0f users/sec).",
|
|
344
|
+
completed,
|
|
345
|
+
elapsed,
|
|
346
|
+
rate,
|
|
347
|
+
)
|
|
348
|
+
while next_progress_report <= completed:
|
|
349
|
+
next_progress_report += progress_step
|
|
350
|
+
|
|
351
|
+
# Submit-while-iterating. Iterating `users` may block on each
|
|
352
|
+
# ListUsers page when a streaming iterator is passed; during those
|
|
353
|
+
# blocks, workers continue processing already-submitted tasks.
|
|
354
|
+
with run_context.thread_pool(parallelism, worker_pool) as executor:
|
|
355
|
+
batch_users: list[SnapshotUserInput] = []
|
|
356
|
+
for user in users:
|
|
357
|
+
batch_users.append(user)
|
|
358
|
+
if len(batch_users) >= explicit_permissions_batch_size:
|
|
359
|
+
_submit_batch(executor, batch_users)
|
|
360
|
+
batch_users = []
|
|
361
|
+
if len(futures) >= max_pending_batches:
|
|
362
|
+
done_futures, _ = wait(futures, return_when=FIRST_COMPLETED)
|
|
363
|
+
_record_completed_futures(done_futures)
|
|
364
|
+
_submit_batch(executor, batch_users)
|
|
365
|
+
all_users_submitted = True
|
|
366
|
+
|
|
367
|
+
while futures:
|
|
368
|
+
done_futures, _ = wait(futures, return_when=FIRST_COMPLETED)
|
|
369
|
+
_record_completed_futures(done_futures)
|
|
370
|
+
capture_event["user_count"] = submitted_user_count
|
|
371
|
+
capture_event["per_user_failures"] = capture_failures
|
|
372
|
+
capture_event["max_pending_batches"] = max_pending_batches
|
|
373
|
+
|
|
374
|
+
# Stable sort: users alphabetical within each repo.
|
|
375
|
+
for usernames in usernames_by_repository_id.values():
|
|
376
|
+
usernames.sort()
|
|
377
|
+
|
|
378
|
+
with src.event(
|
|
379
|
+
"hydrate_explicit_repository_names",
|
|
380
|
+
repository_count=len(usernames_by_repository_id),
|
|
381
|
+
) as hydrate_event:
|
|
382
|
+
repositories_by_id = permissions_sourcegraph.list_repositories_by_ids(
|
|
383
|
+
client,
|
|
384
|
+
usernames_by_repository_id.keys(),
|
|
385
|
+
)
|
|
386
|
+
hydrate_event["hydrated_repository_count"] = len(repositories_by_id)
|
|
387
|
+
|
|
388
|
+
repos_out: dict[str, RepoSnapshot] = {}
|
|
389
|
+
for repository_id, usernames in usernames_by_repository_id.items():
|
|
390
|
+
repos_out[repository_id] = {
|
|
391
|
+
"name": _snapshot_repository_name(repositories_by_id, repository_id),
|
|
392
|
+
"explicit_permissions_users": usernames,
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
return repos_out, submitted_user_count
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def _snapshot_repository_name(
|
|
399
|
+
repositories_by_id: dict[str, permission_types.Repository],
|
|
400
|
+
repository_id: str,
|
|
401
|
+
) -> str:
|
|
402
|
+
repository = repositories_by_id.get(repository_id)
|
|
403
|
+
if repository is not None:
|
|
404
|
+
return repository["name"]
|
|
405
|
+
try:
|
|
406
|
+
decoded_repository_id = id_codec.decode_repository_id(repository_id)
|
|
407
|
+
return f"<repository id={decoded_repository_id}>"
|
|
408
|
+
except ValueError:
|
|
409
|
+
return f"<repository id={repository_id}>"
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
def build_snapshot(
|
|
413
|
+
client: src.SourcegraphClient,
|
|
414
|
+
users: Iterable[SnapshotUserInput],
|
|
415
|
+
parallelism: int,
|
|
416
|
+
bind_id_mode: str,
|
|
417
|
+
config_path: Path | None = None,
|
|
418
|
+
*,
|
|
419
|
+
total_users: int | None = None,
|
|
420
|
+
explicit_permissions_batch_size: int,
|
|
421
|
+
worker_pool: ThreadPoolExecutor | None = None,
|
|
422
|
+
) -> Snapshot:
|
|
423
|
+
"""Capture a full Snapshot: explicit grants + pending-bindIDs + metadata.
|
|
424
|
+
|
|
425
|
+
`users` may be a streaming iterator (see `list_users_streaming`); this
|
|
426
|
+
function delegates iteration to `capture_explicit_grants` which submits
|
|
427
|
+
batched work as the iterator yields, so ListUsers pagination overlaps
|
|
428
|
+
with UserExplicitRepos work.
|
|
429
|
+
|
|
430
|
+
`total_users`, when known, drives percentage + ETA in the per-batch
|
|
431
|
+
progress log lines emitted by `capture_explicit_grants`.
|
|
432
|
+
"""
|
|
433
|
+
with src.event("build_snapshot", bind_id_mode=bind_id_mode) as build_event:
|
|
434
|
+
repos, user_count = capture_explicit_grants(
|
|
435
|
+
client,
|
|
436
|
+
users,
|
|
437
|
+
parallelism,
|
|
438
|
+
explicit_permissions_batch_size,
|
|
439
|
+
total_users=total_users,
|
|
440
|
+
worker_pool=worker_pool,
|
|
441
|
+
)
|
|
442
|
+
pending = permissions_sourcegraph.list_pending_bind_ids(client)
|
|
443
|
+
|
|
444
|
+
config_sha: str | None = None
|
|
445
|
+
if config_path is not None and config_path.exists():
|
|
446
|
+
config_sha = hashlib.sha256(config_path.read_bytes()).hexdigest()
|
|
447
|
+
|
|
448
|
+
distinct_users: set[str] = set()
|
|
449
|
+
total_grants = 0
|
|
450
|
+
for repo in repos.values():
|
|
451
|
+
for username in repo["explicit_permissions_users"]:
|
|
452
|
+
distinct_users.add(username)
|
|
453
|
+
total_grants += 1
|
|
454
|
+
build_event["user_count"] = user_count
|
|
455
|
+
build_event["repos_with_explicit_grants"] = len(repos)
|
|
456
|
+
build_event["users_with_explicit_grants"] = len(distinct_users)
|
|
457
|
+
build_event["total_grants"] = total_grants
|
|
458
|
+
build_event["pending_bindIDs_count"] = len(pending)
|
|
459
|
+
|
|
460
|
+
return {
|
|
461
|
+
"schema_version": SNAPSHOT_SCHEMA_VERSION,
|
|
462
|
+
"captured_at": datetime.datetime.now(datetime.UTC).isoformat(timespec="seconds"),
|
|
463
|
+
"endpoint": client.endpoint,
|
|
464
|
+
"bindID_mode": bind_id_mode,
|
|
465
|
+
"config_file": str(config_path.resolve()) if config_path else None,
|
|
466
|
+
"config_sha256": config_sha,
|
|
467
|
+
"pending_bindIDs": sorted(pending),
|
|
468
|
+
"stats": {
|
|
469
|
+
"total_users_scanned": user_count,
|
|
470
|
+
"users_with_explicit_grants": len(distinct_users),
|
|
471
|
+
"repos_with_explicit_grants": len(repos),
|
|
472
|
+
"total_grants": total_grants,
|
|
473
|
+
},
|
|
474
|
+
"repos": dict(sorted(repos.items())), # sort by repo_id for stable file
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
def capture_user_scoped_explicit_grants(
|
|
479
|
+
client: src.SourcegraphClient,
|
|
480
|
+
users: Iterable[SnapshotUser],
|
|
481
|
+
parallelism: int,
|
|
482
|
+
worker_pool: ThreadPoolExecutor | None = None,
|
|
483
|
+
) -> dict[str, UserScopedUserSnapshot]:
|
|
484
|
+
"""Capture explicit API grants for only the supplied users."""
|
|
485
|
+
scoped_users: dict[str, UserScopedUserSnapshot] = {}
|
|
486
|
+
|
|
487
|
+
def _fetch(user: SnapshotUser) -> tuple[SnapshotUser, list[permission_types.Repository]]:
|
|
488
|
+
with src.event(
|
|
489
|
+
"user_scoped_explicit_repos_fetch",
|
|
490
|
+
level="DEBUG",
|
|
491
|
+
omit_success_status=True,
|
|
492
|
+
username=user["username"],
|
|
493
|
+
) as fetch_event:
|
|
494
|
+
repos = permissions_sourcegraph.list_user_explicit_repos(client, user["id"])
|
|
495
|
+
fetch_event["repo_count"] = len(repos)
|
|
496
|
+
return user, repos
|
|
497
|
+
|
|
498
|
+
with src.event("capture_user_scoped_explicit_grants") as capture_event:
|
|
499
|
+
futures: dict[Any, SnapshotUser] = {}
|
|
500
|
+
with run_context.thread_pool(parallelism, worker_pool) as executor:
|
|
501
|
+
for user in users:
|
|
502
|
+
futures[src.submit_with_log_context(executor, _fetch, user)] = user
|
|
503
|
+
for future in as_completed(futures):
|
|
504
|
+
user = futures[future]
|
|
505
|
+
fetched_user: SnapshotUser
|
|
506
|
+
repos: list[permission_types.Repository]
|
|
507
|
+
try:
|
|
508
|
+
fetched_user, repos = future.result()
|
|
509
|
+
except Exception as exception:
|
|
510
|
+
log.warning(
|
|
511
|
+
"Failed to fetch scoped explicit grants for user=%s: %s",
|
|
512
|
+
user["username"],
|
|
513
|
+
exception,
|
|
514
|
+
)
|
|
515
|
+
fetched_user, repos = user, []
|
|
516
|
+
scoped_users[fetched_user["username"]] = {
|
|
517
|
+
"id": fetched_user["id"],
|
|
518
|
+
"explicit_repositories": sorted(repos, key=lambda repo: repo["name"]),
|
|
519
|
+
}
|
|
520
|
+
capture_event["user_count"] = len(scoped_users)
|
|
521
|
+
capture_event["total_grants"] = sum(
|
|
522
|
+
len(user_snapshot["explicit_repositories"]) for user_snapshot in scoped_users.values()
|
|
523
|
+
)
|
|
524
|
+
return dict(sorted(scoped_users.items()))
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
def build_user_scoped_snapshot(
|
|
528
|
+
client: src.SourcegraphClient,
|
|
529
|
+
users: Iterable[SnapshotUser],
|
|
530
|
+
parallelism: int,
|
|
531
|
+
bind_id_mode: str,
|
|
532
|
+
config_path: Path | None = None,
|
|
533
|
+
worker_pool: ThreadPoolExecutor | None = None,
|
|
534
|
+
) -> UserScopedSnapshot:
|
|
535
|
+
"""Capture a reversible snapshot for only the supplied users."""
|
|
536
|
+
with src.event("build_user_scoped_snapshot", bind_id_mode=bind_id_mode) as build_event:
|
|
537
|
+
scoped_users = capture_user_scoped_explicit_grants(
|
|
538
|
+
client,
|
|
539
|
+
users,
|
|
540
|
+
parallelism,
|
|
541
|
+
worker_pool=worker_pool,
|
|
542
|
+
)
|
|
543
|
+
config_sha: str | None = None
|
|
544
|
+
if config_path is not None and config_path.exists():
|
|
545
|
+
config_sha = hashlib.sha256(config_path.read_bytes()).hexdigest()
|
|
546
|
+
|
|
547
|
+
total_grants = sum(
|
|
548
|
+
len(user_snapshot["explicit_repositories"]) for user_snapshot in scoped_users.values()
|
|
549
|
+
)
|
|
550
|
+
users_with_explicit_grants = sum(
|
|
551
|
+
1 for user_snapshot in scoped_users.values() if user_snapshot["explicit_repositories"]
|
|
552
|
+
)
|
|
553
|
+
build_event["user_count"] = len(scoped_users)
|
|
554
|
+
build_event["users_with_explicit_grants"] = users_with_explicit_grants
|
|
555
|
+
build_event["total_grants"] = total_grants
|
|
556
|
+
|
|
557
|
+
return {
|
|
558
|
+
"schema_version": SNAPSHOT_SCHEMA_VERSION,
|
|
559
|
+
"snapshot_kind": USER_SCOPED_SNAPSHOT_KIND,
|
|
560
|
+
"captured_at": datetime.datetime.now(datetime.UTC).isoformat(timespec="seconds"),
|
|
561
|
+
"endpoint": client.endpoint,
|
|
562
|
+
"bindID_mode": bind_id_mode,
|
|
563
|
+
"config_file": str(config_path.resolve()) if config_path else None,
|
|
564
|
+
"config_sha256": config_sha,
|
|
565
|
+
"stats": {
|
|
566
|
+
"total_users_scanned": len(scoped_users),
|
|
567
|
+
"users_with_explicit_grants": users_with_explicit_grants,
|
|
568
|
+
"total_grants": total_grants,
|
|
569
|
+
},
|
|
570
|
+
"users": scoped_users,
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
def _write_pretty_json(path: Path, value: Any) -> int:
|
|
575
|
+
"""Write pretty JSON without materializing the encoded string first."""
|
|
576
|
+
with path.open("w", encoding="utf-8") as output:
|
|
577
|
+
json.dump(value, output, indent=2, sort_keys=False)
|
|
578
|
+
output.write("\n")
|
|
579
|
+
return path.stat().st_size
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
def _write_top_level_json_field(
|
|
583
|
+
output: TextIO,
|
|
584
|
+
name: str,
|
|
585
|
+
value: object,
|
|
586
|
+
*,
|
|
587
|
+
first: bool,
|
|
588
|
+
) -> None:
|
|
589
|
+
if not first:
|
|
590
|
+
output.write(",\n")
|
|
591
|
+
output.write(f" {json.dumps(name)}: ")
|
|
592
|
+
output.write(json.dumps(value, indent=2).replace("\n", "\n "))
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
def _write_string_list(output: TextIO, values: Sequence[str], indent: int) -> None:
|
|
596
|
+
if not values:
|
|
597
|
+
output.write("[]")
|
|
598
|
+
return
|
|
599
|
+
output.write("[\n")
|
|
600
|
+
value_indent = " " * (indent + 2)
|
|
601
|
+
for index, value in enumerate(values):
|
|
602
|
+
if index:
|
|
603
|
+
output.write(",\n")
|
|
604
|
+
output.write(value_indent)
|
|
605
|
+
json.dump(value, output)
|
|
606
|
+
output.write("\n" + " " * indent + "]")
|
|
607
|
+
|
|
608
|
+
|
|
609
|
+
def _write_repo_snapshot_value(output: TextIO, repo: RepoSnapshot, indent: int) -> None:
|
|
610
|
+
field_indent = " " * (indent + 2)
|
|
611
|
+
output.write("{\n")
|
|
612
|
+
output.write(f'{field_indent}"name": ')
|
|
613
|
+
json.dump(repo["name"], output)
|
|
614
|
+
output.write(",\n")
|
|
615
|
+
output.write(f'{field_indent}"explicit_permissions_users": ')
|
|
616
|
+
_write_string_list(output, repo["explicit_permissions_users"], indent + 2)
|
|
617
|
+
output.write("\n" + " " * indent + "}")
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
def _write_repository_value(output: TextIO, repository: permission_types.Repository) -> None:
|
|
621
|
+
output.write("{")
|
|
622
|
+
output.write('"id": ')
|
|
623
|
+
json.dump(id_codec.decode_repository_id(repository["id"]), output)
|
|
624
|
+
output.write(', "name": ')
|
|
625
|
+
json.dump(repository["name"], output)
|
|
626
|
+
output.write("}")
|
|
627
|
+
|
|
628
|
+
|
|
629
|
+
def _write_repository_list(
|
|
630
|
+
output: TextIO,
|
|
631
|
+
repositories: Sequence[permission_types.Repository],
|
|
632
|
+
indent: int,
|
|
633
|
+
) -> None:
|
|
634
|
+
if not repositories:
|
|
635
|
+
output.write("[]")
|
|
636
|
+
return
|
|
637
|
+
output.write("[\n")
|
|
638
|
+
value_indent = " " * (indent + 2)
|
|
639
|
+
for index, repository in enumerate(repositories):
|
|
640
|
+
if index:
|
|
641
|
+
output.write(",\n")
|
|
642
|
+
output.write(value_indent)
|
|
643
|
+
_write_repository_value(output, repository)
|
|
644
|
+
output.write("\n" + " " * indent + "]")
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
def _write_user_scoped_snapshot_value(
|
|
648
|
+
output: TextIO,
|
|
649
|
+
user_snapshot: UserScopedUserSnapshot,
|
|
650
|
+
indent: int,
|
|
651
|
+
) -> None:
|
|
652
|
+
field_indent = " " * (indent + 2)
|
|
653
|
+
output.write("{\n")
|
|
654
|
+
output.write(f'{field_indent}"id": ')
|
|
655
|
+
json.dump(user_snapshot["id"], output)
|
|
656
|
+
output.write(",\n")
|
|
657
|
+
output.write(f'{field_indent}"explicit_repositories": ')
|
|
658
|
+
_write_repository_list(output, user_snapshot["explicit_repositories"], indent + 2)
|
|
659
|
+
output.write("\n" + " " * indent + "}")
|
|
660
|
+
|
|
661
|
+
|
|
662
|
+
def _write_snapshot_json(
|
|
663
|
+
path: Path,
|
|
664
|
+
snapshot: Snapshot,
|
|
665
|
+
repos: Iterable[tuple[str, RepoSnapshot]],
|
|
666
|
+
) -> int:
|
|
667
|
+
"""Write a full snapshot without duplicating the repo map for ID decoding."""
|
|
668
|
+
with path.open("w", encoding="utf-8") as output:
|
|
669
|
+
output.write("{\n")
|
|
670
|
+
first = True
|
|
671
|
+
fields: tuple[tuple[str, object], ...] = (
|
|
672
|
+
("schema_version", snapshot["schema_version"]),
|
|
673
|
+
("captured_at", snapshot["captured_at"]),
|
|
674
|
+
("endpoint", snapshot["endpoint"]),
|
|
675
|
+
("bindID_mode", snapshot["bindID_mode"]),
|
|
676
|
+
("config_file", snapshot["config_file"]),
|
|
677
|
+
("config_sha256", snapshot["config_sha256"]),
|
|
678
|
+
("pending_bindIDs", snapshot["pending_bindIDs"]),
|
|
679
|
+
("stats", snapshot["stats"]),
|
|
680
|
+
)
|
|
681
|
+
for field_name, value in fields:
|
|
682
|
+
_write_top_level_json_field(
|
|
683
|
+
output,
|
|
684
|
+
field_name,
|
|
685
|
+
value,
|
|
686
|
+
first=first,
|
|
687
|
+
)
|
|
688
|
+
first = False
|
|
689
|
+
|
|
690
|
+
output.write(',\n "repos": {')
|
|
691
|
+
wrote_repo = False
|
|
692
|
+
for repo_id, repo in repos:
|
|
693
|
+
if wrote_repo:
|
|
694
|
+
output.write(",")
|
|
695
|
+
output.write("\n ")
|
|
696
|
+
json.dump(str(id_codec.decode_repository_id(repo_id)), output)
|
|
697
|
+
output.write(": ")
|
|
698
|
+
_write_repo_snapshot_value(output, repo, 4)
|
|
699
|
+
wrote_repo = True
|
|
700
|
+
if wrote_repo:
|
|
701
|
+
output.write("\n }")
|
|
702
|
+
else:
|
|
703
|
+
output.write("}")
|
|
704
|
+
output.write("\n}\n")
|
|
705
|
+
return path.stat().st_size
|
|
706
|
+
|
|
707
|
+
|
|
708
|
+
def write_snapshot_with_repos(
|
|
709
|
+
path: Path,
|
|
710
|
+
snapshot: Snapshot,
|
|
711
|
+
repos: Iterable[tuple[str, RepoSnapshot]],
|
|
712
|
+
) -> None:
|
|
713
|
+
"""Persist a full snapshot from an iterable of repo entries."""
|
|
714
|
+
with src.event(
|
|
715
|
+
"disk_io",
|
|
716
|
+
level="DEBUG",
|
|
717
|
+
op="write",
|
|
718
|
+
path=str(path),
|
|
719
|
+
file_kind="snapshot",
|
|
720
|
+
) as disk_event:
|
|
721
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
722
|
+
disk_event["bytes"] = _write_snapshot_json(path, snapshot, repos)
|
|
723
|
+
|
|
724
|
+
|
|
725
|
+
def write_snapshot(path: Path, snapshot: Snapshot) -> None:
|
|
726
|
+
"""Persist a snapshot to disk as pretty-printed JSON with stable ordering.
|
|
727
|
+
|
|
728
|
+
Repo IDs are decoded from their opaque GraphQL Node form
|
|
729
|
+
(`Repository:<int>` base64) to plain integer DB primary keys before
|
|
730
|
+
write — they're far easier to grep, diff, and read by eye.
|
|
731
|
+
`read_snapshot` re-encodes them on load so the in-memory shape (and
|
|
732
|
+
every consumer of `Snapshot`) keeps using opaque IDs unchanged.
|
|
733
|
+
"""
|
|
734
|
+
write_snapshot_with_repos(path, snapshot, snapshot["repos"].items())
|
|
735
|
+
|
|
736
|
+
|
|
737
|
+
def write_user_scoped_snapshot(path: Path, snapshot: UserScopedSnapshot) -> None:
|
|
738
|
+
"""Persist a user-scoped snapshot with readable repository IDs."""
|
|
739
|
+
with src.event(
|
|
740
|
+
"disk_io",
|
|
741
|
+
level="DEBUG",
|
|
742
|
+
op="write",
|
|
743
|
+
path=str(path),
|
|
744
|
+
file_kind="user_scoped_snapshot",
|
|
745
|
+
) as disk_event:
|
|
746
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
747
|
+
with path.open("w", encoding="utf-8") as output:
|
|
748
|
+
output.write("{\n")
|
|
749
|
+
first = True
|
|
750
|
+
fields: tuple[tuple[str, object], ...] = (
|
|
751
|
+
("schema_version", snapshot["schema_version"]),
|
|
752
|
+
("snapshot_kind", snapshot["snapshot_kind"]),
|
|
753
|
+
("captured_at", snapshot["captured_at"]),
|
|
754
|
+
("endpoint", snapshot["endpoint"]),
|
|
755
|
+
("bindID_mode", snapshot["bindID_mode"]),
|
|
756
|
+
("config_file", snapshot["config_file"]),
|
|
757
|
+
("config_sha256", snapshot["config_sha256"]),
|
|
758
|
+
("stats", snapshot["stats"]),
|
|
759
|
+
)
|
|
760
|
+
for field_name, value in fields:
|
|
761
|
+
_write_top_level_json_field(
|
|
762
|
+
output,
|
|
763
|
+
field_name,
|
|
764
|
+
value,
|
|
765
|
+
first=first,
|
|
766
|
+
)
|
|
767
|
+
first = False
|
|
768
|
+
|
|
769
|
+
output.write(',\n "users": {')
|
|
770
|
+
wrote_user = False
|
|
771
|
+
for username, user_snapshot in snapshot["users"].items():
|
|
772
|
+
if wrote_user:
|
|
773
|
+
output.write(",")
|
|
774
|
+
output.write("\n ")
|
|
775
|
+
json.dump(username, output)
|
|
776
|
+
output.write(": ")
|
|
777
|
+
_write_user_scoped_snapshot_value(output, user_snapshot, 4)
|
|
778
|
+
wrote_user = True
|
|
779
|
+
if wrote_user:
|
|
780
|
+
output.write("\n }")
|
|
781
|
+
else:
|
|
782
|
+
output.write("}")
|
|
783
|
+
output.write("\n}\n")
|
|
784
|
+
disk_event["bytes"] = path.stat().st_size
|
|
785
|
+
|
|
786
|
+
|
|
787
|
+
def _read_snapshot_raw(path: Path, file_kind: str) -> dict[str, Any]:
|
|
788
|
+
with src.event(
|
|
789
|
+
"disk_io",
|
|
790
|
+
level="DEBUG",
|
|
791
|
+
op="read",
|
|
792
|
+
path=str(path),
|
|
793
|
+
file_kind=file_kind,
|
|
794
|
+
) as disk_event:
|
|
795
|
+
disk_event["bytes"] = path.stat().st_size
|
|
796
|
+
with path.open(encoding="utf-8") as snapshot_file:
|
|
797
|
+
return cast(dict[str, Any], json.load(snapshot_file))
|
|
798
|
+
|
|
799
|
+
|
|
800
|
+
def _validate_snapshot_schema_version(path: Path, version: object) -> None:
|
|
801
|
+
"""Validate snapshot schema version."""
|
|
802
|
+
if version == SNAPSHOT_SCHEMA_VERSION:
|
|
803
|
+
return
|
|
804
|
+
raise SystemExit(
|
|
805
|
+
f"{path}: snapshot schema_version is {version!r}, "
|
|
806
|
+
f"expected {SNAPSHOT_SCHEMA_VERSION}. Refusing to load."
|
|
807
|
+
)
|
|
808
|
+
|
|
809
|
+
|
|
810
|
+
def _encode_full_snapshot_raw(path: Path, raw: dict[str, Any]) -> Snapshot:
|
|
811
|
+
_validate_snapshot_schema_version(path, raw.get("schema_version"))
|
|
812
|
+
if raw.get("snapshot_kind") == USER_SCOPED_SNAPSHOT_KIND:
|
|
813
|
+
raise SystemExit(f"{path}: snapshot_kind is 'user_scope', expected full repo snapshot.")
|
|
814
|
+
on_disk_repos = cast(dict[str, RepoSnapshot], raw.get("repos", {}))
|
|
815
|
+
raw["repos"] = {
|
|
816
|
+
id_codec.encode_repository_id(int(repo_id)): repo for repo_id, repo in on_disk_repos.items()
|
|
817
|
+
}
|
|
818
|
+
return cast(Snapshot, raw)
|
|
819
|
+
|
|
820
|
+
|
|
821
|
+
def _encode_user_scoped_snapshot_raw(path: Path, raw: dict[str, Any]) -> UserScopedSnapshot:
|
|
822
|
+
_validate_snapshot_schema_version(path, raw.get("schema_version"))
|
|
823
|
+
kind = raw.get("snapshot_kind")
|
|
824
|
+
if kind != USER_SCOPED_SNAPSHOT_KIND:
|
|
825
|
+
raise SystemExit(f"{path}: snapshot_kind is {kind!r}, expected 'user_scope'.")
|
|
826
|
+
|
|
827
|
+
on_disk_users = cast(dict[str, dict[str, Any]], raw.get("users", {}))
|
|
828
|
+
raw["users"] = {
|
|
829
|
+
username: {
|
|
830
|
+
"id": user_snapshot["id"],
|
|
831
|
+
"explicit_repositories": [
|
|
832
|
+
{
|
|
833
|
+
"id": id_codec.encode_repository_id(int(repo["id"])),
|
|
834
|
+
"name": cast(str, repo["name"]),
|
|
835
|
+
}
|
|
836
|
+
for repo in cast(list[dict[str, Any]], user_snapshot["explicit_repositories"])
|
|
837
|
+
],
|
|
838
|
+
}
|
|
839
|
+
for username, user_snapshot in on_disk_users.items()
|
|
840
|
+
}
|
|
841
|
+
return cast(UserScopedSnapshot, raw)
|
|
842
|
+
|
|
843
|
+
|
|
844
|
+
def read_snapshot_file(path: Path) -> Snapshot | UserScopedSnapshot:
|
|
845
|
+
"""Load either supported snapshot kind from disk with one JSON parse."""
|
|
846
|
+
raw = _read_snapshot_raw(path, "snapshot")
|
|
847
|
+
if raw.get("snapshot_kind") == USER_SCOPED_SNAPSHOT_KIND:
|
|
848
|
+
return _encode_user_scoped_snapshot_raw(path, raw)
|
|
849
|
+
return _encode_full_snapshot_raw(path, raw)
|
|
850
|
+
|
|
851
|
+
|
|
852
|
+
def read_snapshot(path: Path) -> Snapshot:
|
|
853
|
+
"""Load a snapshot from disk. Validates schema_version.
|
|
854
|
+
|
|
855
|
+
Re-encodes integer repo IDs from disk back to opaque GraphQL Node
|
|
856
|
+
IDs (`Repository:<int>` base64) so callers see the same shape that
|
|
857
|
+
`build_snapshot` produces in memory.
|
|
858
|
+
"""
|
|
859
|
+
return _encode_full_snapshot_raw(path, _read_snapshot_raw(path, "snapshot"))
|
|
860
|
+
|
|
861
|
+
|
|
862
|
+
def read_user_scoped_snapshot(path: Path) -> UserScopedSnapshot:
|
|
863
|
+
"""Load a user-scoped snapshot and re-encode repository IDs."""
|
|
864
|
+
return _encode_user_scoped_snapshot_raw(
|
|
865
|
+
path,
|
|
866
|
+
_read_snapshot_raw(path, "user_scoped_snapshot"),
|
|
867
|
+
)
|
|
868
|
+
|
|
869
|
+
|
|
870
|
+
class RepoDiff(TypedDict):
|
|
871
|
+
name: str
|
|
872
|
+
added: list[str]
|
|
873
|
+
removed: list[str]
|
|
874
|
+
|
|
875
|
+
|
|
876
|
+
@dataclass(frozen=True)
|
|
877
|
+
class _SnapshotDiffPlan:
|
|
878
|
+
changed_repo_ids: list[str]
|
|
879
|
+
grants_added: int
|
|
880
|
+
grants_removed: int
|
|
881
|
+
pending_added: list[str]
|
|
882
|
+
pending_removed: list[str]
|
|
883
|
+
|
|
884
|
+
|
|
885
|
+
def _sorted_usernames(values: Sequence[str]) -> Sequence[str]:
|
|
886
|
+
if all(values[index - 1] <= values[index] for index in range(1, len(values))):
|
|
887
|
+
return values
|
|
888
|
+
return sorted(values)
|
|
889
|
+
|
|
890
|
+
|
|
891
|
+
def _repo_usernames(repo: RepoSnapshot | None) -> Sequence[str]:
|
|
892
|
+
if repo is None:
|
|
893
|
+
return ()
|
|
894
|
+
return repo["explicit_permissions_users"]
|
|
895
|
+
|
|
896
|
+
|
|
897
|
+
def _sorted_username_diff_counts(
|
|
898
|
+
before_usernames: Sequence[str],
|
|
899
|
+
after_usernames: Sequence[str],
|
|
900
|
+
) -> tuple[int, int]:
|
|
901
|
+
if before_usernames == after_usernames:
|
|
902
|
+
return 0, 0
|
|
903
|
+
before_sorted = _sorted_usernames(before_usernames)
|
|
904
|
+
after_sorted = _sorted_usernames(after_usernames)
|
|
905
|
+
before_index = 0
|
|
906
|
+
after_index = 0
|
|
907
|
+
added = 0
|
|
908
|
+
removed = 0
|
|
909
|
+
while before_index < len(before_sorted) and after_index < len(after_sorted):
|
|
910
|
+
before_username = before_sorted[before_index]
|
|
911
|
+
after_username = after_sorted[after_index]
|
|
912
|
+
if before_username == after_username:
|
|
913
|
+
before_index += 1
|
|
914
|
+
after_index += 1
|
|
915
|
+
elif before_username < after_username:
|
|
916
|
+
removed += 1
|
|
917
|
+
before_index += 1
|
|
918
|
+
else:
|
|
919
|
+
added += 1
|
|
920
|
+
after_index += 1
|
|
921
|
+
removed += len(before_sorted) - before_index
|
|
922
|
+
added += len(after_sorted) - after_index
|
|
923
|
+
return added, removed
|
|
924
|
+
|
|
925
|
+
|
|
926
|
+
def _sorted_username_diff_values(
|
|
927
|
+
before_usernames: Sequence[str],
|
|
928
|
+
after_usernames: Sequence[str],
|
|
929
|
+
) -> tuple[list[str], list[str]]:
|
|
930
|
+
if before_usernames == after_usernames:
|
|
931
|
+
return [], []
|
|
932
|
+
before_sorted = _sorted_usernames(before_usernames)
|
|
933
|
+
after_sorted = _sorted_usernames(after_usernames)
|
|
934
|
+
before_index = 0
|
|
935
|
+
after_index = 0
|
|
936
|
+
added: list[str] = []
|
|
937
|
+
removed: list[str] = []
|
|
938
|
+
while before_index < len(before_sorted) and after_index < len(after_sorted):
|
|
939
|
+
before_username = before_sorted[before_index]
|
|
940
|
+
after_username = after_sorted[after_index]
|
|
941
|
+
if before_username == after_username:
|
|
942
|
+
before_index += 1
|
|
943
|
+
after_index += 1
|
|
944
|
+
elif before_username < after_username:
|
|
945
|
+
removed.append(before_username)
|
|
946
|
+
before_index += 1
|
|
947
|
+
else:
|
|
948
|
+
added.append(after_username)
|
|
949
|
+
after_index += 1
|
|
950
|
+
removed.extend(before_sorted[before_index:])
|
|
951
|
+
added.extend(after_sorted[after_index:])
|
|
952
|
+
return added, removed
|
|
953
|
+
|
|
954
|
+
|
|
955
|
+
def diff_snapshots(
|
|
956
|
+
before: dict[str, RepoSnapshot],
|
|
957
|
+
after: dict[str, RepoSnapshot],
|
|
958
|
+
) -> dict[str, RepoDiff]:
|
|
959
|
+
"""Compute per-repo {added, removed} bindID lists.
|
|
960
|
+
|
|
961
|
+
Repos present in only one side appear with the appropriate users
|
|
962
|
+
in `added` (after-only) or `removed` (before-only). Repos with
|
|
963
|
+
identical user lists on both sides are omitted entirely from the result.
|
|
964
|
+
"""
|
|
965
|
+
diff: dict[str, RepoDiff] = {}
|
|
966
|
+
for repo_id in set(before) | set(after):
|
|
967
|
+
before_entry = before.get(repo_id)
|
|
968
|
+
after_entry = after.get(repo_id)
|
|
969
|
+
added, removed = _sorted_username_diff_values(
|
|
970
|
+
_repo_usernames(before_entry),
|
|
971
|
+
_repo_usernames(after_entry),
|
|
972
|
+
)
|
|
973
|
+
if not added and not removed:
|
|
974
|
+
continue
|
|
975
|
+
# prefer post-state name
|
|
976
|
+
name = (after_entry or before_entry or {"name": "<unknown>"})["name"]
|
|
977
|
+
diff[repo_id] = {
|
|
978
|
+
"name": name,
|
|
979
|
+
"added": added,
|
|
980
|
+
"removed": removed,
|
|
981
|
+
}
|
|
982
|
+
return diff
|
|
983
|
+
|
|
984
|
+
|
|
985
|
+
def _snapshot_diff_repo_name(
|
|
986
|
+
before: Snapshot,
|
|
987
|
+
after_repo_for_id: Callable[[str], RepoSnapshot | None],
|
|
988
|
+
repo_id: str,
|
|
989
|
+
) -> str:
|
|
990
|
+
after_repo = after_repo_for_id(repo_id)
|
|
991
|
+
before_repo = before["repos"].get(repo_id)
|
|
992
|
+
return (after_repo or before_repo or {"name": "<unknown>"})["name"]
|
|
993
|
+
|
|
994
|
+
|
|
995
|
+
def _plan_snapshot_diff(
|
|
996
|
+
before: Snapshot,
|
|
997
|
+
after: Snapshot,
|
|
998
|
+
repo_ids: Iterable[str],
|
|
999
|
+
after_repo_for_id: Callable[[str], RepoSnapshot | None],
|
|
1000
|
+
) -> _SnapshotDiffPlan:
|
|
1001
|
+
changed_repo_ids: list[str] = []
|
|
1002
|
+
grants_added = 0
|
|
1003
|
+
grants_removed = 0
|
|
1004
|
+
for repo_id in repo_ids:
|
|
1005
|
+
before_repo = before["repos"].get(repo_id)
|
|
1006
|
+
after_repo = after_repo_for_id(repo_id)
|
|
1007
|
+
added_count, removed_count = _sorted_username_diff_counts(
|
|
1008
|
+
_repo_usernames(before_repo),
|
|
1009
|
+
_repo_usernames(after_repo),
|
|
1010
|
+
)
|
|
1011
|
+
if not added_count and not removed_count:
|
|
1012
|
+
continue
|
|
1013
|
+
changed_repo_ids.append(repo_id)
|
|
1014
|
+
grants_added += added_count
|
|
1015
|
+
grants_removed += removed_count
|
|
1016
|
+
|
|
1017
|
+
changed_repo_ids.sort(
|
|
1018
|
+
key=lambda repo_id: _snapshot_diff_repo_name(before, after_repo_for_id, repo_id)
|
|
1019
|
+
)
|
|
1020
|
+
before_pending = set(before["pending_bindIDs"])
|
|
1021
|
+
after_pending = set(after["pending_bindIDs"])
|
|
1022
|
+
return _SnapshotDiffPlan(
|
|
1023
|
+
changed_repo_ids=changed_repo_ids,
|
|
1024
|
+
grants_added=grants_added,
|
|
1025
|
+
grants_removed=grants_removed,
|
|
1026
|
+
pending_added=sorted(after_pending - before_pending),
|
|
1027
|
+
pending_removed=sorted(before_pending - after_pending),
|
|
1028
|
+
)
|
|
1029
|
+
|
|
1030
|
+
|
|
1031
|
+
def _snapshot_diff_entry(
|
|
1032
|
+
before: Snapshot,
|
|
1033
|
+
after_repo_for_id: Callable[[str], RepoSnapshot | None],
|
|
1034
|
+
repo_id: str,
|
|
1035
|
+
) -> RepositoryPermissionDiffEntry:
|
|
1036
|
+
before_repo = before["repos"].get(repo_id)
|
|
1037
|
+
after_repo = after_repo_for_id(repo_id)
|
|
1038
|
+
added, removed = _sorted_username_diff_values(
|
|
1039
|
+
_repo_usernames(before_repo),
|
|
1040
|
+
_repo_usernames(after_repo),
|
|
1041
|
+
)
|
|
1042
|
+
return {
|
|
1043
|
+
"id": id_codec.decode_repository_id(repo_id),
|
|
1044
|
+
"name": _snapshot_diff_repo_name(before, after_repo_for_id, repo_id),
|
|
1045
|
+
"before_count": _permission_count(before_repo),
|
|
1046
|
+
"after_count": _permission_count(after_repo),
|
|
1047
|
+
"added": added,
|
|
1048
|
+
"removed": removed,
|
|
1049
|
+
}
|
|
1050
|
+
|
|
1051
|
+
|
|
1052
|
+
def _snapshot_diff_summary(plan: _SnapshotDiffPlan) -> SnapshotDiffSummary:
|
|
1053
|
+
return {
|
|
1054
|
+
"repos_changed": len(plan.changed_repo_ids),
|
|
1055
|
+
"grants_added": plan.grants_added,
|
|
1056
|
+
"grants_removed": plan.grants_removed,
|
|
1057
|
+
"pending_bindIDs_added": len(plan.pending_added),
|
|
1058
|
+
"pending_bindIDs_removed": len(plan.pending_removed),
|
|
1059
|
+
}
|
|
1060
|
+
|
|
1061
|
+
|
|
1062
|
+
def _snapshot_diff_pending_bind_ids(
|
|
1063
|
+
plan: _SnapshotDiffPlan,
|
|
1064
|
+
) -> SnapshotDiffPendingBindIDs:
|
|
1065
|
+
return {"added": plan.pending_added, "removed": plan.pending_removed}
|
|
1066
|
+
|
|
1067
|
+
|
|
1068
|
+
def build_snapshot_diff(before: Snapshot, after: Snapshot) -> SnapshotDiff:
|
|
1069
|
+
"""Return a compact JSON-serializable diff between two full snapshots."""
|
|
1070
|
+
after_repo_for_id = after["repos"].get
|
|
1071
|
+
plan = _plan_snapshot_diff(
|
|
1072
|
+
before,
|
|
1073
|
+
after,
|
|
1074
|
+
set(before["repos"]) | set(after["repos"]),
|
|
1075
|
+
after_repo_for_id,
|
|
1076
|
+
)
|
|
1077
|
+
repos = [
|
|
1078
|
+
_snapshot_diff_entry(before, after_repo_for_id, repo_id)
|
|
1079
|
+
for repo_id in plan.changed_repo_ids
|
|
1080
|
+
]
|
|
1081
|
+
return {
|
|
1082
|
+
"schema_version": SNAPSHOT_DIFF_SCHEMA_VERSION,
|
|
1083
|
+
"diff_kind": "repo_permissions",
|
|
1084
|
+
"before": _snapshot_diff_side(before),
|
|
1085
|
+
"after": _snapshot_diff_side(after),
|
|
1086
|
+
"summary": _snapshot_diff_summary(plan),
|
|
1087
|
+
"pending_bindIDs": _snapshot_diff_pending_bind_ids(plan),
|
|
1088
|
+
"repos": repos,
|
|
1089
|
+
}
|
|
1090
|
+
|
|
1091
|
+
|
|
1092
|
+
def _write_snapshot_diff_entry(
|
|
1093
|
+
output: TextIO,
|
|
1094
|
+
entry: RepositoryPermissionDiffEntry,
|
|
1095
|
+
indent: int,
|
|
1096
|
+
) -> None:
|
|
1097
|
+
field_indent = " " * (indent + 2)
|
|
1098
|
+
output.write("{\n")
|
|
1099
|
+
fields: tuple[tuple[str, object], ...] = (
|
|
1100
|
+
("id", entry["id"]),
|
|
1101
|
+
("name", entry["name"]),
|
|
1102
|
+
("before_count", entry["before_count"]),
|
|
1103
|
+
("after_count", entry["after_count"]),
|
|
1104
|
+
)
|
|
1105
|
+
for index, (field_name, value) in enumerate(fields):
|
|
1106
|
+
if index:
|
|
1107
|
+
output.write(",\n")
|
|
1108
|
+
output.write(f"{field_indent}{json.dumps(field_name)}: ")
|
|
1109
|
+
json.dump(value, output)
|
|
1110
|
+
output.write(",\n")
|
|
1111
|
+
output.write(f'{field_indent}"added": ')
|
|
1112
|
+
_write_string_list(output, entry["added"], indent + 2)
|
|
1113
|
+
output.write(",\n")
|
|
1114
|
+
output.write(f'{field_indent}"removed": ')
|
|
1115
|
+
_write_string_list(output, entry["removed"], indent + 2)
|
|
1116
|
+
output.write("\n" + " " * indent + "}")
|
|
1117
|
+
|
|
1118
|
+
|
|
1119
|
+
def _write_snapshot_diff_json(
|
|
1120
|
+
path: Path,
|
|
1121
|
+
before: Snapshot,
|
|
1122
|
+
after: Snapshot,
|
|
1123
|
+
plan: _SnapshotDiffPlan,
|
|
1124
|
+
after_repo_for_id: Callable[[str], RepoSnapshot | None],
|
|
1125
|
+
) -> int:
|
|
1126
|
+
with path.open("w", encoding="utf-8") as output:
|
|
1127
|
+
output.write("{\n")
|
|
1128
|
+
fields: tuple[tuple[str, object], ...] = (
|
|
1129
|
+
("schema_version", SNAPSHOT_DIFF_SCHEMA_VERSION),
|
|
1130
|
+
("diff_kind", "repo_permissions"),
|
|
1131
|
+
("before", _snapshot_diff_side(before)),
|
|
1132
|
+
("after", _snapshot_diff_side(after)),
|
|
1133
|
+
("summary", _snapshot_diff_summary(plan)),
|
|
1134
|
+
("pending_bindIDs", _snapshot_diff_pending_bind_ids(plan)),
|
|
1135
|
+
)
|
|
1136
|
+
first = True
|
|
1137
|
+
for field_name, value in fields:
|
|
1138
|
+
_write_top_level_json_field(output, field_name, value, first=first)
|
|
1139
|
+
first = False
|
|
1140
|
+
|
|
1141
|
+
output.write(',\n "repos": [')
|
|
1142
|
+
wrote_repo = False
|
|
1143
|
+
for repo_id in plan.changed_repo_ids:
|
|
1144
|
+
if wrote_repo:
|
|
1145
|
+
output.write(",")
|
|
1146
|
+
output.write("\n ")
|
|
1147
|
+
_write_snapshot_diff_entry(
|
|
1148
|
+
output,
|
|
1149
|
+
_snapshot_diff_entry(before, after_repo_for_id, repo_id),
|
|
1150
|
+
4,
|
|
1151
|
+
)
|
|
1152
|
+
wrote_repo = True
|
|
1153
|
+
if wrote_repo:
|
|
1154
|
+
output.write("\n ]")
|
|
1155
|
+
else:
|
|
1156
|
+
output.write("]")
|
|
1157
|
+
output.write("\n}\n")
|
|
1158
|
+
return path.stat().st_size
|
|
1159
|
+
|
|
1160
|
+
|
|
1161
|
+
def write_snapshot_diff_from_snapshot_parts(
|
|
1162
|
+
path: Path,
|
|
1163
|
+
before: Snapshot,
|
|
1164
|
+
after: Snapshot,
|
|
1165
|
+
repo_ids: Iterable[str],
|
|
1166
|
+
after_repo_for_id: Callable[[str], RepoSnapshot | None],
|
|
1167
|
+
) -> None:
|
|
1168
|
+
"""Persist a full-snapshot diff without materializing every repo diff."""
|
|
1169
|
+
plan = _plan_snapshot_diff(before, after, repo_ids, after_repo_for_id)
|
|
1170
|
+
with src.event(
|
|
1171
|
+
"disk_io",
|
|
1172
|
+
level="DEBUG",
|
|
1173
|
+
op="write",
|
|
1174
|
+
path=str(path),
|
|
1175
|
+
file_kind="snapshot_diff",
|
|
1176
|
+
) as disk_event:
|
|
1177
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
1178
|
+
disk_event["bytes"] = _write_snapshot_diff_json(
|
|
1179
|
+
path,
|
|
1180
|
+
before,
|
|
1181
|
+
after,
|
|
1182
|
+
plan,
|
|
1183
|
+
after_repo_for_id,
|
|
1184
|
+
)
|
|
1185
|
+
|
|
1186
|
+
|
|
1187
|
+
def write_snapshot_diff_from_snapshots(path: Path, before: Snapshot, after: Snapshot) -> None:
|
|
1188
|
+
"""Persist a compact diff between two full snapshots."""
|
|
1189
|
+
write_snapshot_diff_from_snapshot_parts(
|
|
1190
|
+
path,
|
|
1191
|
+
before,
|
|
1192
|
+
after,
|
|
1193
|
+
set(before["repos"]) | set(after["repos"]),
|
|
1194
|
+
after["repos"].get,
|
|
1195
|
+
)
|
|
1196
|
+
|
|
1197
|
+
|
|
1198
|
+
def write_snapshot_diff(path: Path, diff: SnapshotDiff) -> None:
|
|
1199
|
+
"""Persist a compact full-snapshot diff as pretty-printed JSON."""
|
|
1200
|
+
with src.event(
|
|
1201
|
+
"disk_io",
|
|
1202
|
+
level="DEBUG",
|
|
1203
|
+
op="write",
|
|
1204
|
+
path=str(path),
|
|
1205
|
+
file_kind="snapshot_diff",
|
|
1206
|
+
) as disk_event:
|
|
1207
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
1208
|
+
disk_event["bytes"] = _write_pretty_json(path, diff)
|
|
1209
|
+
|
|
1210
|
+
|
|
1211
|
+
def build_user_scoped_snapshot_diff(
|
|
1212
|
+
before: UserScopedSnapshot,
|
|
1213
|
+
after: UserScopedSnapshot,
|
|
1214
|
+
) -> UserScopedSnapshotDiff:
|
|
1215
|
+
"""Return a compact JSON-serializable diff between two scoped snapshots."""
|
|
1216
|
+
users: list[UserScopedSnapshotDiffEntry] = []
|
|
1217
|
+
grants_added = 0
|
|
1218
|
+
grants_removed = 0
|
|
1219
|
+
for username in sorted(set(before["users"]) | set(after["users"])):
|
|
1220
|
+
before_user = before["users"].get(username)
|
|
1221
|
+
after_user = after["users"].get(username)
|
|
1222
|
+
before_repositories = _repositories_by_id(before_user)
|
|
1223
|
+
after_repositories = _repositories_by_id(after_user)
|
|
1224
|
+
before_ids = set(before_repositories)
|
|
1225
|
+
after_ids = set(after_repositories)
|
|
1226
|
+
added_ids = sorted(after_ids - before_ids, key=lambda repo_id: after_repositories[repo_id])
|
|
1227
|
+
removed_ids = sorted(
|
|
1228
|
+
before_ids - after_ids,
|
|
1229
|
+
key=lambda repo_id: before_repositories[repo_id],
|
|
1230
|
+
)
|
|
1231
|
+
if not added_ids and not removed_ids:
|
|
1232
|
+
continue
|
|
1233
|
+
grants_added += len(added_ids)
|
|
1234
|
+
grants_removed += len(removed_ids)
|
|
1235
|
+
if after_user is not None:
|
|
1236
|
+
user_id = after_user["id"]
|
|
1237
|
+
elif before_user is not None:
|
|
1238
|
+
user_id = before_user["id"]
|
|
1239
|
+
else:
|
|
1240
|
+
continue
|
|
1241
|
+
users.append(
|
|
1242
|
+
{
|
|
1243
|
+
"username": username,
|
|
1244
|
+
"id": user_id,
|
|
1245
|
+
"before_count": len(before_repositories),
|
|
1246
|
+
"after_count": len(after_repositories),
|
|
1247
|
+
"added_repositories": [
|
|
1248
|
+
_snapshot_diff_repository(repo_id, after_repositories[repo_id])
|
|
1249
|
+
for repo_id in added_ids
|
|
1250
|
+
],
|
|
1251
|
+
"removed_repositories": [
|
|
1252
|
+
_snapshot_diff_repository(repo_id, before_repositories[repo_id])
|
|
1253
|
+
for repo_id in removed_ids
|
|
1254
|
+
],
|
|
1255
|
+
}
|
|
1256
|
+
)
|
|
1257
|
+
return {
|
|
1258
|
+
"schema_version": SNAPSHOT_DIFF_SCHEMA_VERSION,
|
|
1259
|
+
"diff_kind": "user_scoped_permissions",
|
|
1260
|
+
"before": _snapshot_diff_side(before),
|
|
1261
|
+
"after": _snapshot_diff_side(after),
|
|
1262
|
+
"summary": {
|
|
1263
|
+
"users_changed": len(users),
|
|
1264
|
+
"grants_added": grants_added,
|
|
1265
|
+
"grants_removed": grants_removed,
|
|
1266
|
+
},
|
|
1267
|
+
"users": users,
|
|
1268
|
+
}
|
|
1269
|
+
|
|
1270
|
+
|
|
1271
|
+
def write_user_scoped_snapshot_diff(path: Path, diff: UserScopedSnapshotDiff) -> None:
|
|
1272
|
+
"""Persist a compact user-scoped snapshot diff as pretty-printed JSON."""
|
|
1273
|
+
with src.event(
|
|
1274
|
+
"disk_io",
|
|
1275
|
+
level="DEBUG",
|
|
1276
|
+
op="write",
|
|
1277
|
+
path=str(path),
|
|
1278
|
+
file_kind="user_scoped_snapshot_diff",
|
|
1279
|
+
) as disk_event:
|
|
1280
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
1281
|
+
disk_event["bytes"] = _write_pretty_json(path, diff)
|
|
1282
|
+
|
|
1283
|
+
|
|
1284
|
+
MAX_RENDERED_DIFF_ENTRIES = 50
|
|
1285
|
+
MAX_RENDERED_DIFF_VALUES = 50
|
|
1286
|
+
|
|
1287
|
+
|
|
1288
|
+
def _render_limited_values(values: list[str], max_values: int) -> str:
|
|
1289
|
+
if len(values) <= max_values:
|
|
1290
|
+
return ", ".join(values)
|
|
1291
|
+
visible_values = values[:max_values]
|
|
1292
|
+
omitted_count = len(values) - max_values
|
|
1293
|
+
return f"{', '.join(visible_values)}, ... ({omitted_count} more)"
|
|
1294
|
+
|
|
1295
|
+
|
|
1296
|
+
def render_diff(
|
|
1297
|
+
diff: dict[str, RepoDiff],
|
|
1298
|
+
max_repos: int = MAX_RENDERED_DIFF_ENTRIES,
|
|
1299
|
+
max_usernames_per_section: int = MAX_RENDERED_DIFF_VALUES,
|
|
1300
|
+
) -> str:
|
|
1301
|
+
"""Format a diff dict as a human-readable multi-line string."""
|
|
1302
|
+
if not diff:
|
|
1303
|
+
return "No changes."
|
|
1304
|
+
lines: list[str] = []
|
|
1305
|
+
sorted_diff = sorted(diff.items(), key=lambda item: item[1]["name"])
|
|
1306
|
+
total_added = sum(len(repo_diff["added"]) for repo_diff in diff.values())
|
|
1307
|
+
total_removed = sum(len(repo_diff["removed"]) for repo_diff in diff.values())
|
|
1308
|
+
for repo_id, repo_diff in sorted_diff[:max_repos]:
|
|
1309
|
+
lines.append(f"=== {repo_diff['name']} (id={id_codec.decode_repository_id(repo_id)}) ===")
|
|
1310
|
+
if repo_diff["added"]:
|
|
1311
|
+
lines.append(
|
|
1312
|
+
" + added ({count}): {usernames}".format(
|
|
1313
|
+
count=len(repo_diff["added"]),
|
|
1314
|
+
usernames=_render_limited_values(
|
|
1315
|
+
repo_diff["added"],
|
|
1316
|
+
max_usernames_per_section,
|
|
1317
|
+
),
|
|
1318
|
+
)
|
|
1319
|
+
)
|
|
1320
|
+
if repo_diff["removed"]:
|
|
1321
|
+
lines.append(
|
|
1322
|
+
" - removed ({count}): {usernames}".format(
|
|
1323
|
+
count=len(repo_diff["removed"]),
|
|
1324
|
+
usernames=_render_limited_values(
|
|
1325
|
+
repo_diff["removed"],
|
|
1326
|
+
max_usernames_per_section,
|
|
1327
|
+
),
|
|
1328
|
+
)
|
|
1329
|
+
)
|
|
1330
|
+
omitted_repos = len(sorted_diff) - max_repos
|
|
1331
|
+
if omitted_repos > 0:
|
|
1332
|
+
lines.append(
|
|
1333
|
+
f"... {omitted_repos} more repo(s) omitted from log output; "
|
|
1334
|
+
"see diff.json for full added/removed lists."
|
|
1335
|
+
)
|
|
1336
|
+
lines.append("")
|
|
1337
|
+
lines.append(
|
|
1338
|
+
f"Summary: {len(diff)} repo(s) changed; "
|
|
1339
|
+
f"{total_added} grant(s) added, {total_removed} grant(s) removed."
|
|
1340
|
+
)
|
|
1341
|
+
return "\n".join(lines)
|
|
1342
|
+
|
|
1343
|
+
|
|
1344
|
+
def render_snapshot_diff_from_snapshot_parts(
|
|
1345
|
+
before: Snapshot,
|
|
1346
|
+
after: Snapshot,
|
|
1347
|
+
repo_ids: Iterable[str],
|
|
1348
|
+
after_repo_for_id: Callable[[str], RepoSnapshot | None],
|
|
1349
|
+
max_repos: int = MAX_RENDERED_DIFF_ENTRIES,
|
|
1350
|
+
max_usernames_per_section: int = MAX_RENDERED_DIFF_VALUES,
|
|
1351
|
+
) -> str:
|
|
1352
|
+
"""Format a capped human diff without materializing the full diff."""
|
|
1353
|
+
plan = _plan_snapshot_diff(before, after, repo_ids, after_repo_for_id)
|
|
1354
|
+
if not plan.changed_repo_ids:
|
|
1355
|
+
return "No changes."
|
|
1356
|
+
|
|
1357
|
+
lines: list[str] = []
|
|
1358
|
+
for repo_id in plan.changed_repo_ids[:max_repos]:
|
|
1359
|
+
entry = _snapshot_diff_entry(before, after_repo_for_id, repo_id)
|
|
1360
|
+
lines.append(f"=== {entry['name']} (id={entry['id']}) ===")
|
|
1361
|
+
if entry["added"]:
|
|
1362
|
+
lines.append(
|
|
1363
|
+
" + added ({count}): {usernames}".format(
|
|
1364
|
+
count=len(entry["added"]),
|
|
1365
|
+
usernames=_render_limited_values(
|
|
1366
|
+
entry["added"],
|
|
1367
|
+
max_usernames_per_section,
|
|
1368
|
+
),
|
|
1369
|
+
)
|
|
1370
|
+
)
|
|
1371
|
+
if entry["removed"]:
|
|
1372
|
+
lines.append(
|
|
1373
|
+
" - removed ({count}): {usernames}".format(
|
|
1374
|
+
count=len(entry["removed"]),
|
|
1375
|
+
usernames=_render_limited_values(
|
|
1376
|
+
entry["removed"],
|
|
1377
|
+
max_usernames_per_section,
|
|
1378
|
+
),
|
|
1379
|
+
)
|
|
1380
|
+
)
|
|
1381
|
+
omitted_repos = len(plan.changed_repo_ids) - max_repos
|
|
1382
|
+
if omitted_repos > 0:
|
|
1383
|
+
lines.append(
|
|
1384
|
+
f"... {omitted_repos} more repo(s) omitted from log output; "
|
|
1385
|
+
"see diff.json for full added/removed lists."
|
|
1386
|
+
)
|
|
1387
|
+
lines.append("")
|
|
1388
|
+
lines.append(
|
|
1389
|
+
f"Summary: {len(plan.changed_repo_ids)} repo(s) changed; "
|
|
1390
|
+
f"{plan.grants_added} grant(s) added, {plan.grants_removed} grant(s) removed."
|
|
1391
|
+
)
|
|
1392
|
+
return "\n".join(lines)
|
|
1393
|
+
|
|
1394
|
+
|
|
1395
|
+
def render_snapshot_diff(
|
|
1396
|
+
before: Snapshot,
|
|
1397
|
+
after: Snapshot,
|
|
1398
|
+
max_repos: int = MAX_RENDERED_DIFF_ENTRIES,
|
|
1399
|
+
max_usernames_per_section: int = MAX_RENDERED_DIFF_VALUES,
|
|
1400
|
+
) -> str:
|
|
1401
|
+
"""Format a capped human diff between two full snapshots."""
|
|
1402
|
+
return render_snapshot_diff_from_snapshot_parts(
|
|
1403
|
+
before,
|
|
1404
|
+
after,
|
|
1405
|
+
set(before["repos"]) | set(after["repos"]),
|
|
1406
|
+
after["repos"].get,
|
|
1407
|
+
max_repos,
|
|
1408
|
+
max_usernames_per_section,
|
|
1409
|
+
)
|
|
1410
|
+
|
|
1411
|
+
|
|
1412
|
+
def render_user_scoped_diff(
|
|
1413
|
+
before: UserScopedSnapshot,
|
|
1414
|
+
after: UserScopedSnapshot,
|
|
1415
|
+
max_users: int = MAX_RENDERED_DIFF_ENTRIES,
|
|
1416
|
+
max_repositories_per_section: int = MAX_RENDERED_DIFF_VALUES,
|
|
1417
|
+
) -> str:
|
|
1418
|
+
"""Format a user-scoped snapshot diff as human-readable text."""
|
|
1419
|
+
lines: list[str] = []
|
|
1420
|
+
total_added = 0
|
|
1421
|
+
total_removed = 0
|
|
1422
|
+
changed_users = 0
|
|
1423
|
+
for username in sorted(set(before["users"]) | set(after["users"])):
|
|
1424
|
+
before_repositories = _repositories_by_id(before["users"].get(username))
|
|
1425
|
+
after_repositories = _repositories_by_id(after["users"].get(username))
|
|
1426
|
+
before_ids = set(before_repositories)
|
|
1427
|
+
after_ids = set(after_repositories)
|
|
1428
|
+
added_ids = sorted(after_ids - before_ids, key=lambda repo_id: after_repositories[repo_id])
|
|
1429
|
+
removed_ids = sorted(
|
|
1430
|
+
before_ids - after_ids,
|
|
1431
|
+
key=lambda repo_id: before_repositories[repo_id],
|
|
1432
|
+
)
|
|
1433
|
+
if not added_ids and not removed_ids:
|
|
1434
|
+
continue
|
|
1435
|
+
changed_users += 1
|
|
1436
|
+
total_added += len(added_ids)
|
|
1437
|
+
total_removed += len(removed_ids)
|
|
1438
|
+
if changed_users > max_users:
|
|
1439
|
+
continue
|
|
1440
|
+
lines.append(f"=== {username} ===")
|
|
1441
|
+
if added_ids:
|
|
1442
|
+
lines.append(
|
|
1443
|
+
" + added ({count}): {repos}".format(
|
|
1444
|
+
count=len(added_ids),
|
|
1445
|
+
repos=_render_limited_values(
|
|
1446
|
+
[after_repositories[repo_id] for repo_id in added_ids],
|
|
1447
|
+
max_repositories_per_section,
|
|
1448
|
+
),
|
|
1449
|
+
)
|
|
1450
|
+
)
|
|
1451
|
+
if removed_ids:
|
|
1452
|
+
lines.append(
|
|
1453
|
+
" - removed ({count}): {repos}".format(
|
|
1454
|
+
count=len(removed_ids),
|
|
1455
|
+
repos=_render_limited_values(
|
|
1456
|
+
[before_repositories[repo_id] for repo_id in removed_ids],
|
|
1457
|
+
max_repositories_per_section,
|
|
1458
|
+
),
|
|
1459
|
+
)
|
|
1460
|
+
)
|
|
1461
|
+
if not lines:
|
|
1462
|
+
return "No changes."
|
|
1463
|
+
omitted_users = changed_users - max_users
|
|
1464
|
+
if omitted_users > 0:
|
|
1465
|
+
lines.append(
|
|
1466
|
+
f"... {omitted_users} more user(s) omitted from log output; "
|
|
1467
|
+
"see diff.json for full added/removed lists."
|
|
1468
|
+
)
|
|
1469
|
+
lines.append("")
|
|
1470
|
+
lines.append(f"Summary: {total_added} grant(s) added, {total_removed} grant(s) removed.")
|
|
1471
|
+
return "\n".join(lines)
|
|
1472
|
+
|
|
1473
|
+
|
|
1474
|
+
def _repositories_by_id(
|
|
1475
|
+
user_snapshot: UserScopedUserSnapshot | None,
|
|
1476
|
+
) -> dict[str, str]:
|
|
1477
|
+
if user_snapshot is None:
|
|
1478
|
+
return {}
|
|
1479
|
+
return {
|
|
1480
|
+
repository["id"]: repository["name"]
|
|
1481
|
+
for repository in user_snapshot["explicit_repositories"]
|
|
1482
|
+
}
|
|
1483
|
+
|
|
1484
|
+
|
|
1485
|
+
def _permission_count(repo_snapshot: RepoSnapshot | None) -> int:
|
|
1486
|
+
if repo_snapshot is None:
|
|
1487
|
+
return 0
|
|
1488
|
+
return len(repo_snapshot["explicit_permissions_users"])
|
|
1489
|
+
|
|
1490
|
+
|
|
1491
|
+
def _snapshot_diff_side(snapshot: Snapshot | UserScopedSnapshot) -> SnapshotDiffSide:
|
|
1492
|
+
return {
|
|
1493
|
+
"captured_at": snapshot["captured_at"],
|
|
1494
|
+
"endpoint": snapshot["endpoint"],
|
|
1495
|
+
"bindID_mode": snapshot["bindID_mode"],
|
|
1496
|
+
"config_file": snapshot["config_file"],
|
|
1497
|
+
"config_sha256": snapshot["config_sha256"],
|
|
1498
|
+
}
|
|
1499
|
+
|
|
1500
|
+
|
|
1501
|
+
def _snapshot_diff_repository(repo_id: str, repo_name: str) -> SnapshotDiffRepository:
|
|
1502
|
+
return {"id": id_codec.decode_repository_id(repo_id), "name": repo_name}
|