src-auth-perms-sync 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- src_auth_perms_sync/__init__.py +1 -0
- src_auth_perms_sync/__main__.py +6 -0
- src_auth_perms_sync/cli.py +646 -0
- src_auth_perms_sync/orgs/__init__.py +1 -0
- src_auth_perms_sync/orgs/command.py +7 -0
- src_auth_perms_sync/orgs/queries.py +44 -0
- src_auth_perms_sync/orgs/sync.py +1167 -0
- src_auth_perms_sync/orgs/types.py +103 -0
- src_auth_perms_sync/permissions/__init__.py +1 -0
- src_auth_perms_sync/permissions/apply.py +420 -0
- src_auth_perms_sync/permissions/command.py +918 -0
- src_auth_perms_sync/permissions/full_set.py +880 -0
- src_auth_perms_sync/permissions/mapping.py +627 -0
- src_auth_perms_sync/permissions/maps.py +291 -0
- src_auth_perms_sync/permissions/queries.py +180 -0
- src_auth_perms_sync/permissions/restore.py +913 -0
- src_auth_perms_sync/permissions/snapshot.py +1502 -0
- src_auth_perms_sync/permissions/sourcegraph.py +392 -0
- src_auth_perms_sync/permissions/types.py +116 -0
- src_auth_perms_sync/permissions/workflow.py +526 -0
- src_auth_perms_sync/shared/__init__.py +1 -0
- src_auth_perms_sync/shared/backups.py +119 -0
- src_auth_perms_sync/shared/id_codec.py +67 -0
- src_auth_perms_sync/shared/queries.py +65 -0
- src_auth_perms_sync/shared/run_context.py +34 -0
- src_auth_perms_sync/shared/saml_groups.py +267 -0
- src_auth_perms_sync/shared/site_config.py +366 -0
- src_auth_perms_sync/shared/sourcegraph.py +69 -0
- src_auth_perms_sync/shared/types.py +69 -0
- src_auth_perms_sync-0.2.1.dist-info/METADATA +256 -0
- src_auth_perms_sync-0.2.1.dist-info/RECORD +34 -0
- src_auth_perms_sync-0.2.1.dist-info/WHEEL +4 -0
- src_auth_perms_sync-0.2.1.dist-info/entry_points.txt +2 -0
- src_auth_perms_sync-0.2.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,526 @@
|
|
|
1
|
+
"""Shared helpers for repo permission command workflows."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import datetime
|
|
6
|
+
import logging
|
|
7
|
+
from collections.abc import Iterator
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
import src_py_lib as src
|
|
11
|
+
|
|
12
|
+
from ..shared import backups, id_codec, saml_groups
|
|
13
|
+
from ..shared import sourcegraph as shared_sourcegraph
|
|
14
|
+
from ..shared import types as shared_types
|
|
15
|
+
from . import mapping as permissions_mapping
|
|
16
|
+
from . import maps as permissions_maps
|
|
17
|
+
from . import snapshot as permission_snapshot
|
|
18
|
+
from . import sourcegraph as permissions_sourcegraph
|
|
19
|
+
from . import types as permission_types
|
|
20
|
+
|
|
21
|
+
log = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def load_discovery(
|
|
25
|
+
client: src.SourcegraphClient,
|
|
26
|
+
saml_groups_attribute_name_by_config_id: dict[str, str],
|
|
27
|
+
) -> tuple[
|
|
28
|
+
list[shared_types.AuthProvider],
|
|
29
|
+
list[permission_types.ExternalService],
|
|
30
|
+
dict[tuple[str, str], str],
|
|
31
|
+
]:
|
|
32
|
+
"""Fetch auth providers + external services and resolve the SAML attribute
|
|
33
|
+
names map, with consistent logging. Shared by --get and --set; returns the
|
|
34
|
+
raw lists so each caller can transform them as needed (YAML form for --get,
|
|
35
|
+
keyed-by-id dict for --set).
|
|
36
|
+
|
|
37
|
+
Both commands need exactly the same instance state to do their work, so
|
|
38
|
+
centralizing this avoids drift in which providers/services are considered
|
|
39
|
+
authoritative or how the per-provider SAML attribute override map is
|
|
40
|
+
resolved.
|
|
41
|
+
"""
|
|
42
|
+
log.info("Querying auth providers from %s ...", client.endpoint)
|
|
43
|
+
providers = shared_sourcegraph.list_auth_providers(client)
|
|
44
|
+
log.info("Received %d auth providers.", len(providers))
|
|
45
|
+
|
|
46
|
+
log.info("Loading external services from %s ...", client.endpoint)
|
|
47
|
+
services = permissions_sourcegraph.list_external_services(client)
|
|
48
|
+
log.info("Received %d external services.", len(services))
|
|
49
|
+
|
|
50
|
+
saml_attribute_names = saml_groups.attribute_names_by_provider_key(
|
|
51
|
+
providers, saml_groups_attribute_name_by_config_id
|
|
52
|
+
)
|
|
53
|
+
return providers, services, saml_attribute_names
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def load_repos_by_external_service(
|
|
57
|
+
client: src.SourcegraphClient,
|
|
58
|
+
services_by_id: dict[int, permission_types.ExternalService],
|
|
59
|
+
) -> dict[int, list[permission_types.Repository]]:
|
|
60
|
+
"""Fetch repos once per discovered code host connection."""
|
|
61
|
+
with src.event(
|
|
62
|
+
"load_repos_by_external_service",
|
|
63
|
+
external_service_count=len(services_by_id),
|
|
64
|
+
) as load_event:
|
|
65
|
+
expected_repo_count = sum(service["repoCount"] for service in services_by_id.values())
|
|
66
|
+
load_event["expected_repo_count"] = expected_repo_count
|
|
67
|
+
log.info(
|
|
68
|
+
"Loading about %d repo(s) across %d code host connection(s) ...",
|
|
69
|
+
expected_repo_count,
|
|
70
|
+
len(services_by_id),
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
repos_by_external_service_id: dict[int, list[permission_types.Repository]] = {}
|
|
74
|
+
total_repos = 0
|
|
75
|
+
for external_service_id in sorted(services_by_id):
|
|
76
|
+
service = services_by_id[external_service_id]
|
|
77
|
+
repos = permissions_sourcegraph.list_repos_for_external_service(client, service["id"])
|
|
78
|
+
repos_by_external_service_id[external_service_id] = repos
|
|
79
|
+
total_repos += len(repos)
|
|
80
|
+
log.info(
|
|
81
|
+
"Received %d repo(s) for code host connection %s (id=%d).",
|
|
82
|
+
len(repos),
|
|
83
|
+
service["displayName"],
|
|
84
|
+
external_service_id,
|
|
85
|
+
)
|
|
86
|
+
load_event["repo_count"] = total_repos
|
|
87
|
+
return repos_by_external_service_id
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def index_repos_by_id(
|
|
91
|
+
repos_by_external_service_id: dict[int, list[permission_types.Repository]],
|
|
92
|
+
) -> dict[str, permission_types.Repository]:
|
|
93
|
+
repos_by_id: dict[str, permission_types.Repository] = {}
|
|
94
|
+
for repos in repos_by_external_service_id.values():
|
|
95
|
+
for repo in repos:
|
|
96
|
+
repos_by_id[repo["id"]] = repo
|
|
97
|
+
return repos_by_id
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def load_mapping_rules(input_path: Path) -> list[permission_types.MappingRule]:
|
|
101
|
+
"""Load and structurally validate mapping rules from YAML."""
|
|
102
|
+
config = permissions_maps.load_maps_yaml(input_path)
|
|
103
|
+
mapping_rules = config.get("maps") or []
|
|
104
|
+
if mapping_rules:
|
|
105
|
+
permissions_mapping.validate_mapping_rules(mapping_rules)
|
|
106
|
+
return mapping_rules
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def load_mapping_context(
|
|
110
|
+
client: src.SourcegraphClient,
|
|
111
|
+
input_path: Path,
|
|
112
|
+
saml_groups_attribute_name_by_config_id: dict[str, str],
|
|
113
|
+
) -> permission_types.MappingContext | None:
|
|
114
|
+
"""Load maps, providers, services, and repos for permission planning."""
|
|
115
|
+
mapping_rules = load_mapping_rules(input_path)
|
|
116
|
+
if not mapping_rules:
|
|
117
|
+
log.warning("No maps defined in %s — nothing to do.", input_path)
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
return load_mapping_context_for_rules(
|
|
121
|
+
client,
|
|
122
|
+
mapping_rules,
|
|
123
|
+
saml_groups_attribute_name_by_config_id,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def load_mapping_context_for_rules(
|
|
128
|
+
client: src.SourcegraphClient,
|
|
129
|
+
mapping_rules: list[permission_types.MappingRule],
|
|
130
|
+
saml_groups_attribute_name_by_config_id: dict[str, str],
|
|
131
|
+
) -> permission_types.MappingContext:
|
|
132
|
+
"""Load providers, services, repos, and warning context for mapping rules."""
|
|
133
|
+
providers, services, saml_groups_attribute_names = load_discovery(
|
|
134
|
+
client, saml_groups_attribute_name_by_config_id
|
|
135
|
+
)
|
|
136
|
+
services_by_id: dict[int, permission_types.ExternalService] = {
|
|
137
|
+
id_codec.decode_external_service_id(service["id"]): service for service in services
|
|
138
|
+
}
|
|
139
|
+
repos_by_external_service_id = load_repos_by_external_service(client, services_by_id)
|
|
140
|
+
all_repos_by_id = index_repos_by_id(repos_by_external_service_id)
|
|
141
|
+
log.info(
|
|
142
|
+
"Received %d unique repo(s) across %d code host connection(s).",
|
|
143
|
+
len(all_repos_by_id),
|
|
144
|
+
len(services_by_id),
|
|
145
|
+
)
|
|
146
|
+
warn_unknown_external_services(mapping_rules, services_by_id)
|
|
147
|
+
return permission_types.MappingContext(
|
|
148
|
+
mapping_rules=mapping_rules,
|
|
149
|
+
providers=providers,
|
|
150
|
+
saml_groups_attribute_names=saml_groups_attribute_names,
|
|
151
|
+
services_by_id=services_by_id,
|
|
152
|
+
repos_by_external_service_id=repos_by_external_service_id,
|
|
153
|
+
all_repos_by_id=all_repos_by_id,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def warn_unknown_external_services(
|
|
158
|
+
mapping_rules: list[permission_types.MappingRule],
|
|
159
|
+
services_by_id: dict[int, permission_types.ExternalService],
|
|
160
|
+
) -> None:
|
|
161
|
+
"""Warn when maps reference code-host connection IDs absent on the instance."""
|
|
162
|
+
for external_service_id in sorted(
|
|
163
|
+
permissions_mapping.referenced_external_service_ids(mapping_rules)
|
|
164
|
+
):
|
|
165
|
+
if external_service_id not in services_by_id:
|
|
166
|
+
log.warning(
|
|
167
|
+
"External service id %s is referenced by the maps but "
|
|
168
|
+
"is not present on the instance — rules using it will "
|
|
169
|
+
"resolve to zero repos.",
|
|
170
|
+
external_service_id,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def snapshot_path(
|
|
175
|
+
input_path: Path,
|
|
176
|
+
timestamp: str,
|
|
177
|
+
endpoint: str,
|
|
178
|
+
command: str,
|
|
179
|
+
state: str | None = None,
|
|
180
|
+
) -> Path:
|
|
181
|
+
"""Return a path inside the run's artifact directory.
|
|
182
|
+
|
|
183
|
+
Example: maps.yaml + endpoint + timestamp + set-apply + before →
|
|
184
|
+
src-auth-perms-sync-runs/sourcegraph.example.com/runs/2026-04-27-01-54-23-set-apply/before.json.
|
|
185
|
+
"""
|
|
186
|
+
return backups.backup_path(input_path.name, timestamp, endpoint, command, state)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def write_snapshot_pair(
|
|
190
|
+
input_path: Path,
|
|
191
|
+
timestamp: str,
|
|
192
|
+
endpoint: str,
|
|
193
|
+
command: str,
|
|
194
|
+
before_snapshot: permission_snapshot.Snapshot,
|
|
195
|
+
after_snapshot: permission_snapshot.Snapshot,
|
|
196
|
+
) -> tuple[Path, Path, Path]:
|
|
197
|
+
before_path = snapshot_path(input_path, timestamp, endpoint, command, "before")
|
|
198
|
+
after_path = snapshot_path(input_path, timestamp, endpoint, command, "after")
|
|
199
|
+
permission_snapshot.write_snapshot(before_path, before_snapshot)
|
|
200
|
+
permission_snapshot.write_snapshot(after_path, after_snapshot)
|
|
201
|
+
diff_path = write_snapshot_diff_file(
|
|
202
|
+
input_path,
|
|
203
|
+
timestamp,
|
|
204
|
+
endpoint,
|
|
205
|
+
command,
|
|
206
|
+
before_snapshot,
|
|
207
|
+
after_snapshot,
|
|
208
|
+
)
|
|
209
|
+
return before_path, after_path, diff_path
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def write_snapshot_diff_file(
|
|
213
|
+
input_path: Path,
|
|
214
|
+
timestamp: str,
|
|
215
|
+
endpoint: str,
|
|
216
|
+
command: str,
|
|
217
|
+
before_snapshot: permission_snapshot.Snapshot,
|
|
218
|
+
after_snapshot: permission_snapshot.Snapshot,
|
|
219
|
+
) -> Path:
|
|
220
|
+
diff_path = snapshot_path(input_path, timestamp, endpoint, command, "diff")
|
|
221
|
+
permission_snapshot.write_snapshot_diff_from_snapshots(
|
|
222
|
+
diff_path,
|
|
223
|
+
before_snapshot,
|
|
224
|
+
after_snapshot,
|
|
225
|
+
)
|
|
226
|
+
return diff_path
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def write_user_scoped_snapshot_diff_file(
|
|
230
|
+
input_path: Path,
|
|
231
|
+
timestamp: str,
|
|
232
|
+
endpoint: str,
|
|
233
|
+
command: str,
|
|
234
|
+
before_snapshot: permission_snapshot.UserScopedSnapshot,
|
|
235
|
+
after_snapshot: permission_snapshot.UserScopedSnapshot,
|
|
236
|
+
) -> Path:
|
|
237
|
+
diff_path = snapshot_path(input_path, timestamp, endpoint, command, "diff")
|
|
238
|
+
permission_snapshot.write_user_scoped_snapshot_diff(
|
|
239
|
+
diff_path,
|
|
240
|
+
permission_snapshot.build_user_scoped_snapshot_diff(before_snapshot, after_snapshot),
|
|
241
|
+
)
|
|
242
|
+
return diff_path
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def maps_backup_path(
|
|
246
|
+
input_path: Path,
|
|
247
|
+
timestamp: str,
|
|
248
|
+
endpoint: str,
|
|
249
|
+
command: str,
|
|
250
|
+
) -> Path:
|
|
251
|
+
"""Path for the companion copy of the maps YAML used for a backup run."""
|
|
252
|
+
return backups.backup_path(input_path.name, timestamp, endpoint, command, suffix="yaml")
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def write_maps_backup(
|
|
256
|
+
input_path: Path,
|
|
257
|
+
timestamp: str,
|
|
258
|
+
endpoint: str,
|
|
259
|
+
command: str,
|
|
260
|
+
) -> Path | None:
|
|
261
|
+
"""Copy the active maps YAML next to the JSON snapshots for auditability."""
|
|
262
|
+
if not input_path.exists():
|
|
263
|
+
log.warning("Could not back up maps file %s because it does not exist.", input_path)
|
|
264
|
+
return None
|
|
265
|
+
|
|
266
|
+
output_path = maps_backup_path(input_path, timestamp, endpoint, command)
|
|
267
|
+
with src.event(
|
|
268
|
+
"disk_io",
|
|
269
|
+
level="DEBUG",
|
|
270
|
+
op="write",
|
|
271
|
+
path=str(output_path),
|
|
272
|
+
file_kind="yaml",
|
|
273
|
+
) as disk_event:
|
|
274
|
+
contents = input_path.read_bytes()
|
|
275
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
276
|
+
output_path.write_bytes(contents)
|
|
277
|
+
disk_event["bytes"] = len(contents)
|
|
278
|
+
log.info("Wrote maps backup: %s", output_path)
|
|
279
|
+
return output_path
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def projected_snapshot_repo_ids(
|
|
283
|
+
before_snapshot: permission_snapshot.Snapshot,
|
|
284
|
+
expected_users: dict[str, tuple[str, ...]],
|
|
285
|
+
) -> list[str]:
|
|
286
|
+
"""Return repo IDs that may appear in a projected full-set after snapshot."""
|
|
287
|
+
return sorted(set(before_snapshot["repos"]) | set(expected_users))
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def projected_snapshot_repo_for_id(
|
|
291
|
+
before_snapshot: permission_snapshot.Snapshot,
|
|
292
|
+
expected_users: dict[str, tuple[str, ...]],
|
|
293
|
+
repo_names: dict[str, str],
|
|
294
|
+
repo_id: str,
|
|
295
|
+
) -> permission_snapshot.RepoSnapshot | None:
|
|
296
|
+
"""Return one projected repo snapshot without cloning the whole snapshot."""
|
|
297
|
+
if repo_id in expected_users:
|
|
298
|
+
usernames = expected_users[repo_id]
|
|
299
|
+
if not usernames:
|
|
300
|
+
return None
|
|
301
|
+
return {
|
|
302
|
+
"name": repo_names[repo_id],
|
|
303
|
+
"explicit_permissions_users": list(usernames),
|
|
304
|
+
}
|
|
305
|
+
return before_snapshot["repos"].get(repo_id)
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def projected_snapshot_repos(
|
|
309
|
+
before_snapshot: permission_snapshot.Snapshot,
|
|
310
|
+
expected_users: dict[str, tuple[str, ...]],
|
|
311
|
+
repo_names: dict[str, str],
|
|
312
|
+
) -> Iterator[tuple[str, permission_snapshot.RepoSnapshot]]:
|
|
313
|
+
"""Return projected repo entries one repo at a time in stable order."""
|
|
314
|
+
for repo_id in projected_snapshot_repo_ids(before_snapshot, expected_users):
|
|
315
|
+
repo = projected_snapshot_repo_for_id(
|
|
316
|
+
before_snapshot,
|
|
317
|
+
expected_users,
|
|
318
|
+
repo_names,
|
|
319
|
+
repo_id,
|
|
320
|
+
)
|
|
321
|
+
if repo is not None:
|
|
322
|
+
yield repo_id, repo
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def projected_snapshot_stats(
|
|
326
|
+
before_snapshot: permission_snapshot.Snapshot,
|
|
327
|
+
expected_users: dict[str, tuple[str, ...]],
|
|
328
|
+
) -> permission_snapshot.SnapshotStats:
|
|
329
|
+
"""Compute projected stats without materializing the projected snapshot."""
|
|
330
|
+
users_with_explicit_grants: set[str] = set()
|
|
331
|
+
total_grants = 0
|
|
332
|
+
repo_count = 0
|
|
333
|
+
for repo_id, repo in before_snapshot["repos"].items():
|
|
334
|
+
if repo_id in expected_users:
|
|
335
|
+
continue
|
|
336
|
+
repo_count += 1
|
|
337
|
+
usernames = repo["explicit_permissions_users"]
|
|
338
|
+
users_with_explicit_grants.update(usernames)
|
|
339
|
+
total_grants += len(usernames)
|
|
340
|
+
for usernames in expected_users.values():
|
|
341
|
+
if not usernames:
|
|
342
|
+
continue
|
|
343
|
+
repo_count += 1
|
|
344
|
+
users_with_explicit_grants.update(usernames)
|
|
345
|
+
total_grants += len(usernames)
|
|
346
|
+
return {
|
|
347
|
+
"total_users_scanned": before_snapshot["stats"]["total_users_scanned"],
|
|
348
|
+
"users_with_explicit_grants": len(users_with_explicit_grants),
|
|
349
|
+
"repos_with_explicit_grants": repo_count,
|
|
350
|
+
"total_grants": total_grants,
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def projected_snapshot_shell(
|
|
355
|
+
before_snapshot: permission_snapshot.Snapshot,
|
|
356
|
+
expected_users: dict[str, tuple[str, ...]],
|
|
357
|
+
) -> permission_snapshot.Snapshot:
|
|
358
|
+
"""Return projected snapshot metadata; repo entries are streamed separately."""
|
|
359
|
+
return {
|
|
360
|
+
"schema_version": before_snapshot["schema_version"],
|
|
361
|
+
"captured_at": datetime.datetime.now(datetime.UTC).isoformat(timespec="seconds"),
|
|
362
|
+
"endpoint": before_snapshot["endpoint"],
|
|
363
|
+
"bindID_mode": before_snapshot["bindID_mode"],
|
|
364
|
+
"config_file": before_snapshot["config_file"],
|
|
365
|
+
"config_sha256": before_snapshot["config_sha256"],
|
|
366
|
+
"pending_bindIDs": list(before_snapshot["pending_bindIDs"]),
|
|
367
|
+
"stats": projected_snapshot_stats(before_snapshot, expected_users),
|
|
368
|
+
"repos": {},
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def write_projected_snapshot(
|
|
373
|
+
path: Path,
|
|
374
|
+
before_snapshot: permission_snapshot.Snapshot,
|
|
375
|
+
expected_users: dict[str, tuple[str, ...]],
|
|
376
|
+
repo_names: dict[str, str],
|
|
377
|
+
) -> permission_snapshot.Snapshot:
|
|
378
|
+
"""Write a projected full-set after snapshot without holding it in memory."""
|
|
379
|
+
after_snapshot = projected_snapshot_shell(before_snapshot, expected_users)
|
|
380
|
+
permission_snapshot.write_snapshot_with_repos(
|
|
381
|
+
path,
|
|
382
|
+
after_snapshot,
|
|
383
|
+
projected_snapshot_repos(before_snapshot, expected_users, repo_names),
|
|
384
|
+
)
|
|
385
|
+
return after_snapshot
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
def write_projected_snapshot_diff_file(
|
|
389
|
+
input_path: Path,
|
|
390
|
+
timestamp: str,
|
|
391
|
+
endpoint: str,
|
|
392
|
+
command: str,
|
|
393
|
+
before_snapshot: permission_snapshot.Snapshot,
|
|
394
|
+
after_snapshot: permission_snapshot.Snapshot,
|
|
395
|
+
expected_users: dict[str, tuple[str, ...]],
|
|
396
|
+
repo_names: dict[str, str],
|
|
397
|
+
) -> Path:
|
|
398
|
+
"""Write a diff for a projected full-set after snapshot."""
|
|
399
|
+
diff_path = snapshot_path(input_path, timestamp, endpoint, command, "diff")
|
|
400
|
+
repo_ids = projected_snapshot_repo_ids(before_snapshot, expected_users)
|
|
401
|
+
permission_snapshot.write_snapshot_diff_from_snapshot_parts(
|
|
402
|
+
diff_path,
|
|
403
|
+
before_snapshot,
|
|
404
|
+
after_snapshot,
|
|
405
|
+
repo_ids,
|
|
406
|
+
lambda repo_id: projected_snapshot_repo_for_id(
|
|
407
|
+
before_snapshot,
|
|
408
|
+
expected_users,
|
|
409
|
+
repo_names,
|
|
410
|
+
repo_id,
|
|
411
|
+
),
|
|
412
|
+
)
|
|
413
|
+
return diff_path
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
def render_projected_snapshot_diff(
|
|
417
|
+
before_snapshot: permission_snapshot.Snapshot,
|
|
418
|
+
after_snapshot: permission_snapshot.Snapshot,
|
|
419
|
+
expected_users: dict[str, tuple[str, ...]],
|
|
420
|
+
repo_names: dict[str, str],
|
|
421
|
+
) -> str:
|
|
422
|
+
"""Render a capped diff for a projected full-set after snapshot."""
|
|
423
|
+
repo_ids = projected_snapshot_repo_ids(before_snapshot, expected_users)
|
|
424
|
+
return permission_snapshot.render_snapshot_diff_from_snapshot_parts(
|
|
425
|
+
before_snapshot,
|
|
426
|
+
after_snapshot,
|
|
427
|
+
repo_ids,
|
|
428
|
+
lambda repo_id: projected_snapshot_repo_for_id(
|
|
429
|
+
before_snapshot,
|
|
430
|
+
expected_users,
|
|
431
|
+
repo_names,
|
|
432
|
+
repo_id,
|
|
433
|
+
),
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
def validate_post_apply(
|
|
438
|
+
after: permission_snapshot.Snapshot,
|
|
439
|
+
expected_users: dict[str, tuple[str, ...]],
|
|
440
|
+
mutated_repo_ids: set[str],
|
|
441
|
+
) -> None:
|
|
442
|
+
"""Post-apply sanity gates. Each failure WARNs/ERRORs but does not raise.
|
|
443
|
+
|
|
444
|
+
1. Pending bindIDs: any username we just wrote that didn't resolve to a
|
|
445
|
+
real User now appears in `usersWithPendingPermissions`. In our use
|
|
446
|
+
case this should never happen — we enumerate users via the users
|
|
447
|
+
query before mutating — but it's a cheap safety net.
|
|
448
|
+
|
|
449
|
+
2. Per-repo expected vs. actual: for every repo we touched, the
|
|
450
|
+
after-snapshot's explicit-user list must equal the union we asked
|
|
451
|
+
for. Disagreement means a partial write, a concurrent mutation by
|
|
452
|
+
another tool, or a server-side bug.
|
|
453
|
+
"""
|
|
454
|
+
requested_usernames: set[str] = set()
|
|
455
|
+
for usernames in expected_users.values():
|
|
456
|
+
requested_usernames.update(usernames)
|
|
457
|
+
pending = set(after["pending_bindIDs"])
|
|
458
|
+
stuck = sorted(requested_usernames & pending)
|
|
459
|
+
if stuck:
|
|
460
|
+
log.error(
|
|
461
|
+
"VALIDATION: %d bindID(s) we just wrote did NOT resolve to "
|
|
462
|
+
"real users (now pending): %s",
|
|
463
|
+
len(stuck),
|
|
464
|
+
", ".join(stuck),
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
mismatches = 0
|
|
468
|
+
for repo_id in mutated_repo_ids:
|
|
469
|
+
expected = list(expected_users.get(repo_id, ()))
|
|
470
|
+
actual_repo = after["repos"].get(repo_id)
|
|
471
|
+
actual = actual_repo["explicit_permissions_users"] if actual_repo else []
|
|
472
|
+
if expected == actual:
|
|
473
|
+
continue
|
|
474
|
+
expected_set = set(expected)
|
|
475
|
+
actual_set = set(actual)
|
|
476
|
+
mismatches += 1
|
|
477
|
+
only_expected = sorted(expected_set - actual_set)
|
|
478
|
+
only_actual = sorted(actual_set - expected_set)
|
|
479
|
+
log.warning(
|
|
480
|
+
"VALIDATION MISMATCH on repo id=%d: expected %d users, got %d. "
|
|
481
|
+
"Expected-but-missing: %s. Actual-but-unexpected: %s.",
|
|
482
|
+
id_codec.decode_repository_id(repo_id),
|
|
483
|
+
len(expected),
|
|
484
|
+
len(actual),
|
|
485
|
+
only_expected or "(none)",
|
|
486
|
+
only_actual or "(none)",
|
|
487
|
+
)
|
|
488
|
+
if mismatches:
|
|
489
|
+
log.warning(
|
|
490
|
+
"VALIDATION: %d / %d mutated repo(s) do not reflect the requested state.",
|
|
491
|
+
mismatches,
|
|
492
|
+
len(mutated_repo_ids),
|
|
493
|
+
)
|
|
494
|
+
else:
|
|
495
|
+
log.info(
|
|
496
|
+
"VALIDATION OK: all %d mutated repo(s) match the requested explicit-permissions state.",
|
|
497
|
+
len(mutated_repo_ids),
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
def parse_cli_date(value: str, flag_name: str) -> datetime.datetime:
|
|
502
|
+
"""Parse and validate a CLI date argument, returning UTC midnight."""
|
|
503
|
+
if len(value) != 10 or value[4] != "-" or value[7] != "-":
|
|
504
|
+
raise SystemExit(f"{flag_name} must use YYYY-MM-DD, got {value!r}.")
|
|
505
|
+
try:
|
|
506
|
+
parsed_date = datetime.date.fromisoformat(value)
|
|
507
|
+
except ValueError as error:
|
|
508
|
+
raise SystemExit(f"{flag_name} must use YYYY-MM-DD, got {value!r}.") from error
|
|
509
|
+
return datetime.datetime.combine(parsed_date, datetime.time(), tzinfo=datetime.UTC)
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
def sourcegraph_datetime_filter(value: datetime.datetime) -> str:
|
|
513
|
+
"""Return a Sourcegraph DateTime filter string for a UTC datetime."""
|
|
514
|
+
return value.isoformat(timespec="seconds").replace("+00:00", "Z")
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
def user_ids_created_on_or_after(client: src.SourcegraphClient, value: str) -> set[str]:
|
|
518
|
+
"""Return Sourcegraph user IDs created on or after the given CLI date."""
|
|
519
|
+
filter_value = sourcegraph_datetime_filter(parse_cli_date(value, "--created-after"))
|
|
520
|
+
candidates = permissions_sourcegraph.list_site_user_candidates(client, filter_value)
|
|
521
|
+
log.info(
|
|
522
|
+
"Restricting to %d Sourcegraph user(s) created on or after %s.",
|
|
523
|
+
len(candidates),
|
|
524
|
+
value,
|
|
525
|
+
)
|
|
526
|
+
return {candidate["id"] for candidate in candidates}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Shared helpers used by auth mapper workflows."""
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""Endpoint-scoped artifact path helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import datetime
|
|
6
|
+
import re
|
|
7
|
+
from collections.abc import Generator
|
|
8
|
+
from contextlib import contextmanager
|
|
9
|
+
from contextvars import ContextVar
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from urllib.parse import urlsplit
|
|
12
|
+
|
|
13
|
+
ARTIFACTS_DIR_NAME = "src-auth-perms-sync-runs"
|
|
14
|
+
LOG_FILE_NAME = "log.json"
|
|
15
|
+
RUNS_DIR_NAME = "runs"
|
|
16
|
+
|
|
17
|
+
_CURRENT_RUN_ARTIFACTS_DIRECTORY: ContextVar[Path | None] = ContextVar(
|
|
18
|
+
"current_run_artifacts_directory",
|
|
19
|
+
default=None,
|
|
20
|
+
)
|
|
21
|
+
_CURRENT_RUN_TIMESTAMP: ContextVar[str | None] = ContextVar(
|
|
22
|
+
"current_run_timestamp",
|
|
23
|
+
default=None,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def backup_timestamp() -> str:
|
|
28
|
+
"""Return a filesystem-friendly UTC timestamp."""
|
|
29
|
+
run_timestamp = _CURRENT_RUN_TIMESTAMP.get()
|
|
30
|
+
if run_timestamp is not None:
|
|
31
|
+
return run_timestamp
|
|
32
|
+
return datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%d-%H-%M-%S")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@contextmanager
|
|
36
|
+
def run_artifacts_context(run_directory: Path, timestamp: str) -> Generator[None]:
|
|
37
|
+
"""Make backup helpers write into the current CLI run directory."""
|
|
38
|
+
directory_token = _CURRENT_RUN_ARTIFACTS_DIRECTORY.set(run_directory)
|
|
39
|
+
timestamp_token = _CURRENT_RUN_TIMESTAMP.set(timestamp)
|
|
40
|
+
try:
|
|
41
|
+
yield
|
|
42
|
+
finally:
|
|
43
|
+
_CURRENT_RUN_TIMESTAMP.reset(timestamp_token)
|
|
44
|
+
_CURRENT_RUN_ARTIFACTS_DIRECTORY.reset(directory_token)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def artifact_run_directory(timestamp: str, endpoint: str, command: str) -> Path:
|
|
48
|
+
"""Return the artifact directory for one command run."""
|
|
49
|
+
run_directory = safe_filename_part(f"{timestamp}-{command}")
|
|
50
|
+
return endpoint_artifacts_directory(endpoint) / RUNS_DIR_NAME / run_directory
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def backup_path(
|
|
54
|
+
source_name: str,
|
|
55
|
+
timestamp: str,
|
|
56
|
+
endpoint: str,
|
|
57
|
+
command: str,
|
|
58
|
+
state: str | None = None,
|
|
59
|
+
*,
|
|
60
|
+
suffix: str = "json",
|
|
61
|
+
) -> Path:
|
|
62
|
+
"""Return an artifact path under one directory per endpoint run."""
|
|
63
|
+
backup_directory = _CURRENT_RUN_ARTIFACTS_DIRECTORY.get() or artifact_run_directory(
|
|
64
|
+
timestamp,
|
|
65
|
+
endpoint,
|
|
66
|
+
command,
|
|
67
|
+
)
|
|
68
|
+
if state is None:
|
|
69
|
+
return backup_directory / safe_filename_part(source_name)
|
|
70
|
+
return backup_directory / f"{safe_filename_part(state)}.{suffix}"
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def run_log_path(run_directory: Path) -> Path:
|
|
74
|
+
"""Return the structured log path for a run artifact directory."""
|
|
75
|
+
return run_directory / LOG_FILE_NAME
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def endpoint_artifacts_directory(endpoint: str, current_directory: Path | None = None) -> Path:
|
|
79
|
+
"""Return this endpoint's artifact directory under the current working directory."""
|
|
80
|
+
base_directory = current_directory or Path.cwd()
|
|
81
|
+
return base_directory / ARTIFACTS_DIR_NAME / endpoint_directory_name(endpoint)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def endpoint_directory_name(endpoint: str) -> str:
|
|
85
|
+
"""Return a filesystem-friendly directory name for a Sourcegraph endpoint."""
|
|
86
|
+
parsed_endpoint = urlsplit(endpoint)
|
|
87
|
+
hostname = parsed_endpoint.hostname
|
|
88
|
+
port = _fallback_endpoint_port(parsed_endpoint.netloc)
|
|
89
|
+
if not hostname:
|
|
90
|
+
endpoint_without_scheme = endpoint.split("://", 1)[-1]
|
|
91
|
+
hostname_and_port = endpoint_without_scheme.split("/", 1)[0]
|
|
92
|
+
hostname = hostname_and_port.split(":", 1)[0]
|
|
93
|
+
port = _fallback_endpoint_port(hostname_and_port)
|
|
94
|
+
directory_name = hostname.lower()
|
|
95
|
+
if port is not None:
|
|
96
|
+
directory_name = f"{directory_name}-{port}"
|
|
97
|
+
return safe_filename_part(directory_name)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def endpoint_artifact_path(endpoint: str, path: Path) -> Path:
|
|
101
|
+
"""Resolve a user-facing artifact path within the endpoint directory by default."""
|
|
102
|
+
if path.is_absolute():
|
|
103
|
+
return path
|
|
104
|
+
return endpoint_artifacts_directory(endpoint) / path
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _fallback_endpoint_port(hostname_and_port: str) -> int | None:
|
|
108
|
+
"""Parse a port from an endpoint netloc that urlsplit could not fully parse."""
|
|
109
|
+
if ":" not in hostname_and_port:
|
|
110
|
+
return None
|
|
111
|
+
raw_port = hostname_and_port.rsplit(":", 1)[1]
|
|
112
|
+
if not raw_port.isdecimal():
|
|
113
|
+
return None
|
|
114
|
+
return int(raw_port)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def safe_filename_part(value: str) -> str:
|
|
118
|
+
"""Return a non-empty string safe for backup filenames."""
|
|
119
|
+
return re.sub(r"[^A-Za-z0-9_.-]+", "_", value).strip("._-") or "unknown"
|