src-auth-perms-sync 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. src_auth_perms_sync/__init__.py +1 -0
  2. src_auth_perms_sync/__main__.py +6 -0
  3. src_auth_perms_sync/cli.py +646 -0
  4. src_auth_perms_sync/orgs/__init__.py +1 -0
  5. src_auth_perms_sync/orgs/command.py +7 -0
  6. src_auth_perms_sync/orgs/queries.py +44 -0
  7. src_auth_perms_sync/orgs/sync.py +1167 -0
  8. src_auth_perms_sync/orgs/types.py +103 -0
  9. src_auth_perms_sync/permissions/__init__.py +1 -0
  10. src_auth_perms_sync/permissions/apply.py +420 -0
  11. src_auth_perms_sync/permissions/command.py +918 -0
  12. src_auth_perms_sync/permissions/full_set.py +880 -0
  13. src_auth_perms_sync/permissions/mapping.py +627 -0
  14. src_auth_perms_sync/permissions/maps.py +291 -0
  15. src_auth_perms_sync/permissions/queries.py +180 -0
  16. src_auth_perms_sync/permissions/restore.py +913 -0
  17. src_auth_perms_sync/permissions/snapshot.py +1502 -0
  18. src_auth_perms_sync/permissions/sourcegraph.py +392 -0
  19. src_auth_perms_sync/permissions/types.py +116 -0
  20. src_auth_perms_sync/permissions/workflow.py +526 -0
  21. src_auth_perms_sync/shared/__init__.py +1 -0
  22. src_auth_perms_sync/shared/backups.py +119 -0
  23. src_auth_perms_sync/shared/id_codec.py +67 -0
  24. src_auth_perms_sync/shared/queries.py +65 -0
  25. src_auth_perms_sync/shared/run_context.py +34 -0
  26. src_auth_perms_sync/shared/saml_groups.py +267 -0
  27. src_auth_perms_sync/shared/site_config.py +366 -0
  28. src_auth_perms_sync/shared/sourcegraph.py +69 -0
  29. src_auth_perms_sync/shared/types.py +69 -0
  30. src_auth_perms_sync-0.2.1.dist-info/METADATA +256 -0
  31. src_auth_perms_sync-0.2.1.dist-info/RECORD +34 -0
  32. src_auth_perms_sync-0.2.1.dist-info/WHEEL +4 -0
  33. src_auth_perms_sync-0.2.1.dist-info/entry_points.txt +2 -0
  34. src_auth_perms_sync-0.2.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,526 @@
1
+ """Shared helpers for repo permission command workflows."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import datetime
6
+ import logging
7
+ from collections.abc import Iterator
8
+ from pathlib import Path
9
+
10
+ import src_py_lib as src
11
+
12
+ from ..shared import backups, id_codec, saml_groups
13
+ from ..shared import sourcegraph as shared_sourcegraph
14
+ from ..shared import types as shared_types
15
+ from . import mapping as permissions_mapping
16
+ from . import maps as permissions_maps
17
+ from . import snapshot as permission_snapshot
18
+ from . import sourcegraph as permissions_sourcegraph
19
+ from . import types as permission_types
20
+
21
+ log = logging.getLogger(__name__)
22
+
23
+
24
+ def load_discovery(
25
+ client: src.SourcegraphClient,
26
+ saml_groups_attribute_name_by_config_id: dict[str, str],
27
+ ) -> tuple[
28
+ list[shared_types.AuthProvider],
29
+ list[permission_types.ExternalService],
30
+ dict[tuple[str, str], str],
31
+ ]:
32
+ """Fetch auth providers + external services and resolve the SAML attribute
33
+ names map, with consistent logging. Shared by --get and --set; returns the
34
+ raw lists so each caller can transform them as needed (YAML form for --get,
35
+ keyed-by-id dict for --set).
36
+
37
+ Both commands need exactly the same instance state to do their work, so
38
+ centralizing this avoids drift in which providers/services are considered
39
+ authoritative or how the per-provider SAML attribute override map is
40
+ resolved.
41
+ """
42
+ log.info("Querying auth providers from %s ...", client.endpoint)
43
+ providers = shared_sourcegraph.list_auth_providers(client)
44
+ log.info("Received %d auth providers.", len(providers))
45
+
46
+ log.info("Loading external services from %s ...", client.endpoint)
47
+ services = permissions_sourcegraph.list_external_services(client)
48
+ log.info("Received %d external services.", len(services))
49
+
50
+ saml_attribute_names = saml_groups.attribute_names_by_provider_key(
51
+ providers, saml_groups_attribute_name_by_config_id
52
+ )
53
+ return providers, services, saml_attribute_names
54
+
55
+
56
+ def load_repos_by_external_service(
57
+ client: src.SourcegraphClient,
58
+ services_by_id: dict[int, permission_types.ExternalService],
59
+ ) -> dict[int, list[permission_types.Repository]]:
60
+ """Fetch repos once per discovered code host connection."""
61
+ with src.event(
62
+ "load_repos_by_external_service",
63
+ external_service_count=len(services_by_id),
64
+ ) as load_event:
65
+ expected_repo_count = sum(service["repoCount"] for service in services_by_id.values())
66
+ load_event["expected_repo_count"] = expected_repo_count
67
+ log.info(
68
+ "Loading about %d repo(s) across %d code host connection(s) ...",
69
+ expected_repo_count,
70
+ len(services_by_id),
71
+ )
72
+
73
+ repos_by_external_service_id: dict[int, list[permission_types.Repository]] = {}
74
+ total_repos = 0
75
+ for external_service_id in sorted(services_by_id):
76
+ service = services_by_id[external_service_id]
77
+ repos = permissions_sourcegraph.list_repos_for_external_service(client, service["id"])
78
+ repos_by_external_service_id[external_service_id] = repos
79
+ total_repos += len(repos)
80
+ log.info(
81
+ "Received %d repo(s) for code host connection %s (id=%d).",
82
+ len(repos),
83
+ service["displayName"],
84
+ external_service_id,
85
+ )
86
+ load_event["repo_count"] = total_repos
87
+ return repos_by_external_service_id
88
+
89
+
90
+ def index_repos_by_id(
91
+ repos_by_external_service_id: dict[int, list[permission_types.Repository]],
92
+ ) -> dict[str, permission_types.Repository]:
93
+ repos_by_id: dict[str, permission_types.Repository] = {}
94
+ for repos in repos_by_external_service_id.values():
95
+ for repo in repos:
96
+ repos_by_id[repo["id"]] = repo
97
+ return repos_by_id
98
+
99
+
100
+ def load_mapping_rules(input_path: Path) -> list[permission_types.MappingRule]:
101
+ """Load and structurally validate mapping rules from YAML."""
102
+ config = permissions_maps.load_maps_yaml(input_path)
103
+ mapping_rules = config.get("maps") or []
104
+ if mapping_rules:
105
+ permissions_mapping.validate_mapping_rules(mapping_rules)
106
+ return mapping_rules
107
+
108
+
109
+ def load_mapping_context(
110
+ client: src.SourcegraphClient,
111
+ input_path: Path,
112
+ saml_groups_attribute_name_by_config_id: dict[str, str],
113
+ ) -> permission_types.MappingContext | None:
114
+ """Load maps, providers, services, and repos for permission planning."""
115
+ mapping_rules = load_mapping_rules(input_path)
116
+ if not mapping_rules:
117
+ log.warning("No maps defined in %s — nothing to do.", input_path)
118
+ return None
119
+
120
+ return load_mapping_context_for_rules(
121
+ client,
122
+ mapping_rules,
123
+ saml_groups_attribute_name_by_config_id,
124
+ )
125
+
126
+
127
+ def load_mapping_context_for_rules(
128
+ client: src.SourcegraphClient,
129
+ mapping_rules: list[permission_types.MappingRule],
130
+ saml_groups_attribute_name_by_config_id: dict[str, str],
131
+ ) -> permission_types.MappingContext:
132
+ """Load providers, services, repos, and warning context for mapping rules."""
133
+ providers, services, saml_groups_attribute_names = load_discovery(
134
+ client, saml_groups_attribute_name_by_config_id
135
+ )
136
+ services_by_id: dict[int, permission_types.ExternalService] = {
137
+ id_codec.decode_external_service_id(service["id"]): service for service in services
138
+ }
139
+ repos_by_external_service_id = load_repos_by_external_service(client, services_by_id)
140
+ all_repos_by_id = index_repos_by_id(repos_by_external_service_id)
141
+ log.info(
142
+ "Received %d unique repo(s) across %d code host connection(s).",
143
+ len(all_repos_by_id),
144
+ len(services_by_id),
145
+ )
146
+ warn_unknown_external_services(mapping_rules, services_by_id)
147
+ return permission_types.MappingContext(
148
+ mapping_rules=mapping_rules,
149
+ providers=providers,
150
+ saml_groups_attribute_names=saml_groups_attribute_names,
151
+ services_by_id=services_by_id,
152
+ repos_by_external_service_id=repos_by_external_service_id,
153
+ all_repos_by_id=all_repos_by_id,
154
+ )
155
+
156
+
157
+ def warn_unknown_external_services(
158
+ mapping_rules: list[permission_types.MappingRule],
159
+ services_by_id: dict[int, permission_types.ExternalService],
160
+ ) -> None:
161
+ """Warn when maps reference code-host connection IDs absent on the instance."""
162
+ for external_service_id in sorted(
163
+ permissions_mapping.referenced_external_service_ids(mapping_rules)
164
+ ):
165
+ if external_service_id not in services_by_id:
166
+ log.warning(
167
+ "External service id %s is referenced by the maps but "
168
+ "is not present on the instance — rules using it will "
169
+ "resolve to zero repos.",
170
+ external_service_id,
171
+ )
172
+
173
+
174
+ def snapshot_path(
175
+ input_path: Path,
176
+ timestamp: str,
177
+ endpoint: str,
178
+ command: str,
179
+ state: str | None = None,
180
+ ) -> Path:
181
+ """Return a path inside the run's artifact directory.
182
+
183
+ Example: maps.yaml + endpoint + timestamp + set-apply + before →
184
+ src-auth-perms-sync-runs/sourcegraph.example.com/runs/2026-04-27-01-54-23-set-apply/before.json.
185
+ """
186
+ return backups.backup_path(input_path.name, timestamp, endpoint, command, state)
187
+
188
+
189
+ def write_snapshot_pair(
190
+ input_path: Path,
191
+ timestamp: str,
192
+ endpoint: str,
193
+ command: str,
194
+ before_snapshot: permission_snapshot.Snapshot,
195
+ after_snapshot: permission_snapshot.Snapshot,
196
+ ) -> tuple[Path, Path, Path]:
197
+ before_path = snapshot_path(input_path, timestamp, endpoint, command, "before")
198
+ after_path = snapshot_path(input_path, timestamp, endpoint, command, "after")
199
+ permission_snapshot.write_snapshot(before_path, before_snapshot)
200
+ permission_snapshot.write_snapshot(after_path, after_snapshot)
201
+ diff_path = write_snapshot_diff_file(
202
+ input_path,
203
+ timestamp,
204
+ endpoint,
205
+ command,
206
+ before_snapshot,
207
+ after_snapshot,
208
+ )
209
+ return before_path, after_path, diff_path
210
+
211
+
212
+ def write_snapshot_diff_file(
213
+ input_path: Path,
214
+ timestamp: str,
215
+ endpoint: str,
216
+ command: str,
217
+ before_snapshot: permission_snapshot.Snapshot,
218
+ after_snapshot: permission_snapshot.Snapshot,
219
+ ) -> Path:
220
+ diff_path = snapshot_path(input_path, timestamp, endpoint, command, "diff")
221
+ permission_snapshot.write_snapshot_diff_from_snapshots(
222
+ diff_path,
223
+ before_snapshot,
224
+ after_snapshot,
225
+ )
226
+ return diff_path
227
+
228
+
229
+ def write_user_scoped_snapshot_diff_file(
230
+ input_path: Path,
231
+ timestamp: str,
232
+ endpoint: str,
233
+ command: str,
234
+ before_snapshot: permission_snapshot.UserScopedSnapshot,
235
+ after_snapshot: permission_snapshot.UserScopedSnapshot,
236
+ ) -> Path:
237
+ diff_path = snapshot_path(input_path, timestamp, endpoint, command, "diff")
238
+ permission_snapshot.write_user_scoped_snapshot_diff(
239
+ diff_path,
240
+ permission_snapshot.build_user_scoped_snapshot_diff(before_snapshot, after_snapshot),
241
+ )
242
+ return diff_path
243
+
244
+
245
+ def maps_backup_path(
246
+ input_path: Path,
247
+ timestamp: str,
248
+ endpoint: str,
249
+ command: str,
250
+ ) -> Path:
251
+ """Path for the companion copy of the maps YAML used for a backup run."""
252
+ return backups.backup_path(input_path.name, timestamp, endpoint, command, suffix="yaml")
253
+
254
+
255
+ def write_maps_backup(
256
+ input_path: Path,
257
+ timestamp: str,
258
+ endpoint: str,
259
+ command: str,
260
+ ) -> Path | None:
261
+ """Copy the active maps YAML next to the JSON snapshots for auditability."""
262
+ if not input_path.exists():
263
+ log.warning("Could not back up maps file %s because it does not exist.", input_path)
264
+ return None
265
+
266
+ output_path = maps_backup_path(input_path, timestamp, endpoint, command)
267
+ with src.event(
268
+ "disk_io",
269
+ level="DEBUG",
270
+ op="write",
271
+ path=str(output_path),
272
+ file_kind="yaml",
273
+ ) as disk_event:
274
+ contents = input_path.read_bytes()
275
+ output_path.parent.mkdir(parents=True, exist_ok=True)
276
+ output_path.write_bytes(contents)
277
+ disk_event["bytes"] = len(contents)
278
+ log.info("Wrote maps backup: %s", output_path)
279
+ return output_path
280
+
281
+
282
+ def projected_snapshot_repo_ids(
283
+ before_snapshot: permission_snapshot.Snapshot,
284
+ expected_users: dict[str, tuple[str, ...]],
285
+ ) -> list[str]:
286
+ """Return repo IDs that may appear in a projected full-set after snapshot."""
287
+ return sorted(set(before_snapshot["repos"]) | set(expected_users))
288
+
289
+
290
+ def projected_snapshot_repo_for_id(
291
+ before_snapshot: permission_snapshot.Snapshot,
292
+ expected_users: dict[str, tuple[str, ...]],
293
+ repo_names: dict[str, str],
294
+ repo_id: str,
295
+ ) -> permission_snapshot.RepoSnapshot | None:
296
+ """Return one projected repo snapshot without cloning the whole snapshot."""
297
+ if repo_id in expected_users:
298
+ usernames = expected_users[repo_id]
299
+ if not usernames:
300
+ return None
301
+ return {
302
+ "name": repo_names[repo_id],
303
+ "explicit_permissions_users": list(usernames),
304
+ }
305
+ return before_snapshot["repos"].get(repo_id)
306
+
307
+
308
+ def projected_snapshot_repos(
309
+ before_snapshot: permission_snapshot.Snapshot,
310
+ expected_users: dict[str, tuple[str, ...]],
311
+ repo_names: dict[str, str],
312
+ ) -> Iterator[tuple[str, permission_snapshot.RepoSnapshot]]:
313
+ """Return projected repo entries one repo at a time in stable order."""
314
+ for repo_id in projected_snapshot_repo_ids(before_snapshot, expected_users):
315
+ repo = projected_snapshot_repo_for_id(
316
+ before_snapshot,
317
+ expected_users,
318
+ repo_names,
319
+ repo_id,
320
+ )
321
+ if repo is not None:
322
+ yield repo_id, repo
323
+
324
+
325
+ def projected_snapshot_stats(
326
+ before_snapshot: permission_snapshot.Snapshot,
327
+ expected_users: dict[str, tuple[str, ...]],
328
+ ) -> permission_snapshot.SnapshotStats:
329
+ """Compute projected stats without materializing the projected snapshot."""
330
+ users_with_explicit_grants: set[str] = set()
331
+ total_grants = 0
332
+ repo_count = 0
333
+ for repo_id, repo in before_snapshot["repos"].items():
334
+ if repo_id in expected_users:
335
+ continue
336
+ repo_count += 1
337
+ usernames = repo["explicit_permissions_users"]
338
+ users_with_explicit_grants.update(usernames)
339
+ total_grants += len(usernames)
340
+ for usernames in expected_users.values():
341
+ if not usernames:
342
+ continue
343
+ repo_count += 1
344
+ users_with_explicit_grants.update(usernames)
345
+ total_grants += len(usernames)
346
+ return {
347
+ "total_users_scanned": before_snapshot["stats"]["total_users_scanned"],
348
+ "users_with_explicit_grants": len(users_with_explicit_grants),
349
+ "repos_with_explicit_grants": repo_count,
350
+ "total_grants": total_grants,
351
+ }
352
+
353
+
354
+ def projected_snapshot_shell(
355
+ before_snapshot: permission_snapshot.Snapshot,
356
+ expected_users: dict[str, tuple[str, ...]],
357
+ ) -> permission_snapshot.Snapshot:
358
+ """Return projected snapshot metadata; repo entries are streamed separately."""
359
+ return {
360
+ "schema_version": before_snapshot["schema_version"],
361
+ "captured_at": datetime.datetime.now(datetime.UTC).isoformat(timespec="seconds"),
362
+ "endpoint": before_snapshot["endpoint"],
363
+ "bindID_mode": before_snapshot["bindID_mode"],
364
+ "config_file": before_snapshot["config_file"],
365
+ "config_sha256": before_snapshot["config_sha256"],
366
+ "pending_bindIDs": list(before_snapshot["pending_bindIDs"]),
367
+ "stats": projected_snapshot_stats(before_snapshot, expected_users),
368
+ "repos": {},
369
+ }
370
+
371
+
372
+ def write_projected_snapshot(
373
+ path: Path,
374
+ before_snapshot: permission_snapshot.Snapshot,
375
+ expected_users: dict[str, tuple[str, ...]],
376
+ repo_names: dict[str, str],
377
+ ) -> permission_snapshot.Snapshot:
378
+ """Write a projected full-set after snapshot without holding it in memory."""
379
+ after_snapshot = projected_snapshot_shell(before_snapshot, expected_users)
380
+ permission_snapshot.write_snapshot_with_repos(
381
+ path,
382
+ after_snapshot,
383
+ projected_snapshot_repos(before_snapshot, expected_users, repo_names),
384
+ )
385
+ return after_snapshot
386
+
387
+
388
+ def write_projected_snapshot_diff_file(
389
+ input_path: Path,
390
+ timestamp: str,
391
+ endpoint: str,
392
+ command: str,
393
+ before_snapshot: permission_snapshot.Snapshot,
394
+ after_snapshot: permission_snapshot.Snapshot,
395
+ expected_users: dict[str, tuple[str, ...]],
396
+ repo_names: dict[str, str],
397
+ ) -> Path:
398
+ """Write a diff for a projected full-set after snapshot."""
399
+ diff_path = snapshot_path(input_path, timestamp, endpoint, command, "diff")
400
+ repo_ids = projected_snapshot_repo_ids(before_snapshot, expected_users)
401
+ permission_snapshot.write_snapshot_diff_from_snapshot_parts(
402
+ diff_path,
403
+ before_snapshot,
404
+ after_snapshot,
405
+ repo_ids,
406
+ lambda repo_id: projected_snapshot_repo_for_id(
407
+ before_snapshot,
408
+ expected_users,
409
+ repo_names,
410
+ repo_id,
411
+ ),
412
+ )
413
+ return diff_path
414
+
415
+
416
+ def render_projected_snapshot_diff(
417
+ before_snapshot: permission_snapshot.Snapshot,
418
+ after_snapshot: permission_snapshot.Snapshot,
419
+ expected_users: dict[str, tuple[str, ...]],
420
+ repo_names: dict[str, str],
421
+ ) -> str:
422
+ """Render a capped diff for a projected full-set after snapshot."""
423
+ repo_ids = projected_snapshot_repo_ids(before_snapshot, expected_users)
424
+ return permission_snapshot.render_snapshot_diff_from_snapshot_parts(
425
+ before_snapshot,
426
+ after_snapshot,
427
+ repo_ids,
428
+ lambda repo_id: projected_snapshot_repo_for_id(
429
+ before_snapshot,
430
+ expected_users,
431
+ repo_names,
432
+ repo_id,
433
+ ),
434
+ )
435
+
436
+
437
+ def validate_post_apply(
438
+ after: permission_snapshot.Snapshot,
439
+ expected_users: dict[str, tuple[str, ...]],
440
+ mutated_repo_ids: set[str],
441
+ ) -> None:
442
+ """Post-apply sanity gates. Each failure WARNs/ERRORs but does not raise.
443
+
444
+ 1. Pending bindIDs: any username we just wrote that didn't resolve to a
445
+ real User now appears in `usersWithPendingPermissions`. In our use
446
+ case this should never happen — we enumerate users via the users
447
+ query before mutating — but it's a cheap safety net.
448
+
449
+ 2. Per-repo expected vs. actual: for every repo we touched, the
450
+ after-snapshot's explicit-user list must equal the union we asked
451
+ for. Disagreement means a partial write, a concurrent mutation by
452
+ another tool, or a server-side bug.
453
+ """
454
+ requested_usernames: set[str] = set()
455
+ for usernames in expected_users.values():
456
+ requested_usernames.update(usernames)
457
+ pending = set(after["pending_bindIDs"])
458
+ stuck = sorted(requested_usernames & pending)
459
+ if stuck:
460
+ log.error(
461
+ "VALIDATION: %d bindID(s) we just wrote did NOT resolve to "
462
+ "real users (now pending): %s",
463
+ len(stuck),
464
+ ", ".join(stuck),
465
+ )
466
+
467
+ mismatches = 0
468
+ for repo_id in mutated_repo_ids:
469
+ expected = list(expected_users.get(repo_id, ()))
470
+ actual_repo = after["repos"].get(repo_id)
471
+ actual = actual_repo["explicit_permissions_users"] if actual_repo else []
472
+ if expected == actual:
473
+ continue
474
+ expected_set = set(expected)
475
+ actual_set = set(actual)
476
+ mismatches += 1
477
+ only_expected = sorted(expected_set - actual_set)
478
+ only_actual = sorted(actual_set - expected_set)
479
+ log.warning(
480
+ "VALIDATION MISMATCH on repo id=%d: expected %d users, got %d. "
481
+ "Expected-but-missing: %s. Actual-but-unexpected: %s.",
482
+ id_codec.decode_repository_id(repo_id),
483
+ len(expected),
484
+ len(actual),
485
+ only_expected or "(none)",
486
+ only_actual or "(none)",
487
+ )
488
+ if mismatches:
489
+ log.warning(
490
+ "VALIDATION: %d / %d mutated repo(s) do not reflect the requested state.",
491
+ mismatches,
492
+ len(mutated_repo_ids),
493
+ )
494
+ else:
495
+ log.info(
496
+ "VALIDATION OK: all %d mutated repo(s) match the requested explicit-permissions state.",
497
+ len(mutated_repo_ids),
498
+ )
499
+
500
+
501
+ def parse_cli_date(value: str, flag_name: str) -> datetime.datetime:
502
+ """Parse and validate a CLI date argument, returning UTC midnight."""
503
+ if len(value) != 10 or value[4] != "-" or value[7] != "-":
504
+ raise SystemExit(f"{flag_name} must use YYYY-MM-DD, got {value!r}.")
505
+ try:
506
+ parsed_date = datetime.date.fromisoformat(value)
507
+ except ValueError as error:
508
+ raise SystemExit(f"{flag_name} must use YYYY-MM-DD, got {value!r}.") from error
509
+ return datetime.datetime.combine(parsed_date, datetime.time(), tzinfo=datetime.UTC)
510
+
511
+
512
+ def sourcegraph_datetime_filter(value: datetime.datetime) -> str:
513
+ """Return a Sourcegraph DateTime filter string for a UTC datetime."""
514
+ return value.isoformat(timespec="seconds").replace("+00:00", "Z")
515
+
516
+
517
+ def user_ids_created_on_or_after(client: src.SourcegraphClient, value: str) -> set[str]:
518
+ """Return Sourcegraph user IDs created on or after the given CLI date."""
519
+ filter_value = sourcegraph_datetime_filter(parse_cli_date(value, "--created-after"))
520
+ candidates = permissions_sourcegraph.list_site_user_candidates(client, filter_value)
521
+ log.info(
522
+ "Restricting to %d Sourcegraph user(s) created on or after %s.",
523
+ len(candidates),
524
+ value,
525
+ )
526
+ return {candidate["id"] for candidate in candidates}
@@ -0,0 +1 @@
1
+ """Shared helpers used by auth mapper workflows."""
@@ -0,0 +1,119 @@
1
+ """Endpoint-scoped artifact path helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import datetime
6
+ import re
7
+ from collections.abc import Generator
8
+ from contextlib import contextmanager
9
+ from contextvars import ContextVar
10
+ from pathlib import Path
11
+ from urllib.parse import urlsplit
12
+
13
+ ARTIFACTS_DIR_NAME = "src-auth-perms-sync-runs"
14
+ LOG_FILE_NAME = "log.json"
15
+ RUNS_DIR_NAME = "runs"
16
+
17
+ _CURRENT_RUN_ARTIFACTS_DIRECTORY: ContextVar[Path | None] = ContextVar(
18
+ "current_run_artifacts_directory",
19
+ default=None,
20
+ )
21
+ _CURRENT_RUN_TIMESTAMP: ContextVar[str | None] = ContextVar(
22
+ "current_run_timestamp",
23
+ default=None,
24
+ )
25
+
26
+
27
+ def backup_timestamp() -> str:
28
+ """Return a filesystem-friendly UTC timestamp."""
29
+ run_timestamp = _CURRENT_RUN_TIMESTAMP.get()
30
+ if run_timestamp is not None:
31
+ return run_timestamp
32
+ return datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%d-%H-%M-%S")
33
+
34
+
35
+ @contextmanager
36
+ def run_artifacts_context(run_directory: Path, timestamp: str) -> Generator[None]:
37
+ """Make backup helpers write into the current CLI run directory."""
38
+ directory_token = _CURRENT_RUN_ARTIFACTS_DIRECTORY.set(run_directory)
39
+ timestamp_token = _CURRENT_RUN_TIMESTAMP.set(timestamp)
40
+ try:
41
+ yield
42
+ finally:
43
+ _CURRENT_RUN_TIMESTAMP.reset(timestamp_token)
44
+ _CURRENT_RUN_ARTIFACTS_DIRECTORY.reset(directory_token)
45
+
46
+
47
+ def artifact_run_directory(timestamp: str, endpoint: str, command: str) -> Path:
48
+ """Return the artifact directory for one command run."""
49
+ run_directory = safe_filename_part(f"{timestamp}-{command}")
50
+ return endpoint_artifacts_directory(endpoint) / RUNS_DIR_NAME / run_directory
51
+
52
+
53
+ def backup_path(
54
+ source_name: str,
55
+ timestamp: str,
56
+ endpoint: str,
57
+ command: str,
58
+ state: str | None = None,
59
+ *,
60
+ suffix: str = "json",
61
+ ) -> Path:
62
+ """Return an artifact path under one directory per endpoint run."""
63
+ backup_directory = _CURRENT_RUN_ARTIFACTS_DIRECTORY.get() or artifact_run_directory(
64
+ timestamp,
65
+ endpoint,
66
+ command,
67
+ )
68
+ if state is None:
69
+ return backup_directory / safe_filename_part(source_name)
70
+ return backup_directory / f"{safe_filename_part(state)}.{suffix}"
71
+
72
+
73
+ def run_log_path(run_directory: Path) -> Path:
74
+ """Return the structured log path for a run artifact directory."""
75
+ return run_directory / LOG_FILE_NAME
76
+
77
+
78
+ def endpoint_artifacts_directory(endpoint: str, current_directory: Path | None = None) -> Path:
79
+ """Return this endpoint's artifact directory under the current working directory."""
80
+ base_directory = current_directory or Path.cwd()
81
+ return base_directory / ARTIFACTS_DIR_NAME / endpoint_directory_name(endpoint)
82
+
83
+
84
+ def endpoint_directory_name(endpoint: str) -> str:
85
+ """Return a filesystem-friendly directory name for a Sourcegraph endpoint."""
86
+ parsed_endpoint = urlsplit(endpoint)
87
+ hostname = parsed_endpoint.hostname
88
+ port = _fallback_endpoint_port(parsed_endpoint.netloc)
89
+ if not hostname:
90
+ endpoint_without_scheme = endpoint.split("://", 1)[-1]
91
+ hostname_and_port = endpoint_without_scheme.split("/", 1)[0]
92
+ hostname = hostname_and_port.split(":", 1)[0]
93
+ port = _fallback_endpoint_port(hostname_and_port)
94
+ directory_name = hostname.lower()
95
+ if port is not None:
96
+ directory_name = f"{directory_name}-{port}"
97
+ return safe_filename_part(directory_name)
98
+
99
+
100
+ def endpoint_artifact_path(endpoint: str, path: Path) -> Path:
101
+ """Resolve a user-facing artifact path within the endpoint directory by default."""
102
+ if path.is_absolute():
103
+ return path
104
+ return endpoint_artifacts_directory(endpoint) / path
105
+
106
+
107
+ def _fallback_endpoint_port(hostname_and_port: str) -> int | None:
108
+ """Parse a port from an endpoint netloc that urlsplit could not fully parse."""
109
+ if ":" not in hostname_and_port:
110
+ return None
111
+ raw_port = hostname_and_port.rsplit(":", 1)[1]
112
+ if not raw_port.isdecimal():
113
+ return None
114
+ return int(raw_port)
115
+
116
+
117
+ def safe_filename_part(value: str) -> str:
118
+ """Return a non-empty string safe for backup filenames."""
119
+ return re.sub(r"[^A-Za-z0-9_.-]+", "_", value).strip("._-") or "unknown"