src-auth-perms-sync 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. src_auth_perms_sync/__init__.py +1 -0
  2. src_auth_perms_sync/__main__.py +6 -0
  3. src_auth_perms_sync/cli.py +646 -0
  4. src_auth_perms_sync/orgs/__init__.py +1 -0
  5. src_auth_perms_sync/orgs/command.py +7 -0
  6. src_auth_perms_sync/orgs/queries.py +44 -0
  7. src_auth_perms_sync/orgs/sync.py +1167 -0
  8. src_auth_perms_sync/orgs/types.py +103 -0
  9. src_auth_perms_sync/permissions/__init__.py +1 -0
  10. src_auth_perms_sync/permissions/apply.py +420 -0
  11. src_auth_perms_sync/permissions/command.py +918 -0
  12. src_auth_perms_sync/permissions/full_set.py +880 -0
  13. src_auth_perms_sync/permissions/mapping.py +627 -0
  14. src_auth_perms_sync/permissions/maps.py +291 -0
  15. src_auth_perms_sync/permissions/queries.py +180 -0
  16. src_auth_perms_sync/permissions/restore.py +913 -0
  17. src_auth_perms_sync/permissions/snapshot.py +1502 -0
  18. src_auth_perms_sync/permissions/sourcegraph.py +392 -0
  19. src_auth_perms_sync/permissions/types.py +116 -0
  20. src_auth_perms_sync/permissions/workflow.py +526 -0
  21. src_auth_perms_sync/shared/__init__.py +1 -0
  22. src_auth_perms_sync/shared/backups.py +119 -0
  23. src_auth_perms_sync/shared/id_codec.py +67 -0
  24. src_auth_perms_sync/shared/queries.py +65 -0
  25. src_auth_perms_sync/shared/run_context.py +34 -0
  26. src_auth_perms_sync/shared/saml_groups.py +267 -0
  27. src_auth_perms_sync/shared/site_config.py +366 -0
  28. src_auth_perms_sync/shared/sourcegraph.py +69 -0
  29. src_auth_perms_sync/shared/types.py +69 -0
  30. src_auth_perms_sync-0.2.1.dist-info/METADATA +256 -0
  31. src_auth_perms_sync-0.2.1.dist-info/RECORD +34 -0
  32. src_auth_perms_sync-0.2.1.dist-info/WHEEL +4 -0
  33. src_auth_perms_sync-0.2.1.dist-info/entry_points.txt +2 -0
  34. src_auth_perms_sync-0.2.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,392 @@
1
+ """Sourcegraph GraphQL list helpers for repo-permission sync."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from collections.abc import Iterable, Iterator, Sequence
7
+ from typing import Any, cast
8
+
9
+ import src_py_lib as src
10
+
11
+ from ..shared import id_codec
12
+ from ..shared import sourcegraph as shared_sourcegraph
13
+ from ..shared import types as shared_types
14
+ from . import queries
15
+ from . import types as permission_types
16
+
17
+ log = logging.getLogger(__name__)
18
+
19
+
20
+ def list_external_services(client: src.SourcegraphClient) -> list[permission_types.ExternalService]:
21
+ return [
22
+ cast(permission_types.ExternalService, node)
23
+ for node in client.stream_connection_nodes(
24
+ queries.QUERY_EXTERNAL_SERVICES,
25
+ connection_path=("externalServices",),
26
+ page_size=shared_sourcegraph.DEFAULT_PAGE_SIZE,
27
+ )
28
+ ]
29
+
30
+
31
+ def list_repos_for_external_service(
32
+ client: src.SourcegraphClient, external_service_id: str
33
+ ) -> list[permission_types.Repository]:
34
+ return [
35
+ cast(permission_types.Repository, node)
36
+ for node in client.stream_connection_nodes(
37
+ queries.QUERY_REPOS_BY_EXTERNAL_SERVICE,
38
+ {"esID": external_service_id},
39
+ connection_path=("repositories",),
40
+ page_size=shared_sourcegraph.DEFAULT_PAGE_SIZE,
41
+ )
42
+ ]
43
+
44
+
45
+ def get_user_by_username(client: src.SourcegraphClient, username: str) -> shared_types.User | None:
46
+ """Return the exact Sourcegraph user for `username`, if it exists."""
47
+ data = cast(
48
+ dict[str, Any],
49
+ client.graphql(queries.QUERY_USER_BY_USERNAME, cast(src.JSONDict, {"username": username})),
50
+ )
51
+ return cast(shared_types.User | None, data.get("user"))
52
+
53
+
54
+ def get_user_by_email(client: src.SourcegraphClient, email: str) -> shared_types.User | None:
55
+ """Return the user owning the verified email address, if it exists."""
56
+ data = cast(
57
+ dict[str, Any],
58
+ client.graphql(queries.QUERY_USER_BY_EMAIL, cast(src.JSONDict, {"email": email})),
59
+ )
60
+ return cast(shared_types.User | None, data.get("user"))
61
+
62
+
63
+ def get_user_by_id(client: src.SourcegraphClient, user_id: str) -> shared_types.User | None:
64
+ """Hydrate a User node by GraphQL ID."""
65
+ data = cast(
66
+ dict[str, Any],
67
+ client.graphql(queries.QUERY_USER_BY_ID, cast(src.JSONDict, {"id": user_id})),
68
+ )
69
+ return cast(shared_types.User | None, data.get("node"))
70
+
71
+
72
+ def list_site_user_candidates(
73
+ client: src.SourcegraphClient,
74
+ created_after: str | None,
75
+ ) -> list[shared_types.SiteUserCandidate]:
76
+ """Return non-deleted site users, optionally filtered by creation time."""
77
+ candidates: list[shared_types.SiteUserCandidate] = []
78
+ offset = 0
79
+ created_filter = {"gte": created_after} if created_after is not None else None
80
+ while True:
81
+ data = cast(
82
+ dict[str, Any],
83
+ client.graphql(
84
+ queries.QUERY_SITE_USERS,
85
+ cast(
86
+ src.JSONDict,
87
+ {
88
+ "limit": shared_sourcegraph.DEFAULT_PAGE_SIZE,
89
+ "offset": offset,
90
+ "createdAt": created_filter,
91
+ },
92
+ ),
93
+ ),
94
+ )
95
+ site_users = cast(dict[str, Any], data["site"]["users"])
96
+ total_count = int(cast(float, site_users["totalCount"]))
97
+ nodes = cast(list[shared_types.SiteUserCandidate], site_users["nodes"])
98
+ candidates.extend(nodes)
99
+ if not nodes or len(candidates) >= total_count:
100
+ return candidates
101
+ offset += len(nodes)
102
+
103
+
104
+ def user_has_explicit_repos(client: src.SourcegraphClient, user_id: str) -> bool:
105
+ """Return whether the user has any explicit API repository grant."""
106
+ data = cast(
107
+ dict[str, Any],
108
+ client.graphql(
109
+ queries.QUERY_USER_EXPLICIT_REPO_EXISTS,
110
+ cast(src.JSONDict, {"id": user_id}),
111
+ ),
112
+ )
113
+ node = cast(dict[str, Any] | None, data.get("node"))
114
+ if node is None:
115
+ return False
116
+ permissions_info = cast(dict[str, Any] | None, node.get("permissionsInfo"))
117
+ if permissions_info is None:
118
+ return False
119
+ repositories = cast(dict[str, Any], permissions_info["repositories"])
120
+ return bool(src.json_list(repositories.get("nodes")))
121
+
122
+
123
+ def list_user_explicit_repos(
124
+ client: src.SourcegraphClient, user_id: str
125
+ ) -> list[permission_types.Repository]:
126
+ """Return all repos with `source: API` grants for `user_id`.
127
+
128
+ Returns a list of `{id, name}` repository objects (matching the
129
+ Repository TypedDict shape). Empty list if the user has no explicit
130
+ grants OR if `permissionsInfo` is null (e.g. soft-deleted user).
131
+ """
132
+ return _repositories_from_ids(client, list_user_explicit_repo_ids(client, user_id))
133
+
134
+
135
+ def list_user_explicit_repo_ids(client: src.SourcegraphClient, user_id: str) -> list[str]:
136
+ """Return repository IDs with `source: API` grants for `user_id`."""
137
+ repository_ids: list[str] = []
138
+ for node in client.stream_connection_nodes(
139
+ queries.QUERY_USER_EXPLICIT_REPOS,
140
+ {"id": user_id},
141
+ connection_path=("node", "permissionsInfo", "repositories"),
142
+ page_size=shared_sourcegraph.DEFAULT_PAGE_SIZE,
143
+ ):
144
+ repository_id = _permission_node_repository_id(node)
145
+ if repository_id is not None:
146
+ repository_ids.append(repository_id)
147
+ return repository_ids
148
+
149
+
150
+ def list_users_explicit_repos(
151
+ client: src.SourcegraphClient,
152
+ user_ids: Sequence[str],
153
+ *,
154
+ batch_size: int,
155
+ ) -> dict[str, list[permission_types.Repository]]:
156
+ """Return explicit API repository grants for many users using GraphQL aliases."""
157
+ return _repositories_by_user_id(
158
+ client,
159
+ list_users_explicit_repo_ids(client, user_ids, batch_size=batch_size),
160
+ )
161
+
162
+
163
+ def list_users_explicit_repo_ids(
164
+ client: src.SourcegraphClient,
165
+ user_ids: Sequence[str],
166
+ *,
167
+ batch_size: int,
168
+ ) -> dict[str, list[str]]:
169
+ """Return explicit API repository IDs for many users using GraphQL aliases."""
170
+ if batch_size < 1:
171
+ raise ValueError("batch_size must be at least 1")
172
+
173
+ repository_ids_by_user_id: dict[str, list[str]] = {user_id: [] for user_id in user_ids}
174
+ pending_pages: list[tuple[str, str | None]] = [(user_id, None) for user_id in user_ids]
175
+ graphql_client = _graphql_client_without_auto_pagination(client)
176
+ while pending_pages:
177
+ batch = pending_pages[:batch_size]
178
+ del pending_pages[:batch_size]
179
+ data = graphql_client.execute(
180
+ _user_explicit_repos_batch_query(len(batch)),
181
+ _user_explicit_repos_batch_variables(batch),
182
+ follow_pages=False,
183
+ )
184
+ for index, (user_id, previous_cursor) in enumerate(batch):
185
+ connection = _user_explicit_repos_connection(data, index)
186
+ if connection is None:
187
+ continue
188
+ repository_ids_by_user_id[user_id].extend(_connection_repository_ids(connection))
189
+ page_info = src.json_dict(connection.get("pageInfo"))
190
+ has_next_page = page_info.get("hasNextPage")
191
+ if not isinstance(has_next_page, bool):
192
+ raise src.GraphQLError(
193
+ f"UserExplicitReposBatch user{index} missing pageInfo.hasNextPage"
194
+ )
195
+ if has_next_page:
196
+ next_cursor = src.json_str(page_info, "endCursor")
197
+ if not next_cursor:
198
+ raise src.GraphQLError(
199
+ f"UserExplicitReposBatch user{index} missing pageInfo.endCursor"
200
+ )
201
+ if next_cursor == previous_cursor:
202
+ raise src.GraphQLError(
203
+ f"UserExplicitReposBatch user{index} cursor stalled at {next_cursor!r}"
204
+ )
205
+ pending_pages.append((user_id, next_cursor))
206
+ return repository_ids_by_user_id
207
+
208
+
209
+ def list_repositories_by_ids(
210
+ client: src.SourcegraphClient,
211
+ repository_ids: Iterable[str],
212
+ *,
213
+ batch_size: int = shared_sourcegraph.DEFAULT_PAGE_SIZE,
214
+ ) -> dict[str, permission_types.Repository]:
215
+ """Return repository `{id, name}` objects for unique GraphQL repository IDs."""
216
+ if batch_size < 1:
217
+ raise ValueError("batch_size must be at least 1")
218
+
219
+ unique_repository_ids = list(dict.fromkeys(repository_ids))
220
+ repositories: dict[str, permission_types.Repository] = {}
221
+ for batch in _batches(unique_repository_ids, batch_size):
222
+ data = cast(
223
+ dict[str, Any],
224
+ client.graphql(
225
+ _repositories_by_id_query(len(batch)),
226
+ _repositories_by_id_variables(batch),
227
+ ),
228
+ )
229
+ for index, requested_repository_id in enumerate(batch):
230
+ repository = src.json_dict(data.get(f"repo{index}"))
231
+ returned_repository_id = repository.get("id")
232
+ repository_name = repository.get("name")
233
+ if isinstance(returned_repository_id, str) and isinstance(repository_name, str):
234
+ repositories[requested_repository_id] = {
235
+ "id": returned_repository_id,
236
+ "name": repository_name,
237
+ }
238
+ return repositories
239
+
240
+
241
+ def _graphql_client_without_auto_pagination(client: src.SourcegraphClient) -> src.GraphQLClient:
242
+ return src.GraphQLClient(
243
+ url=f"{client.endpoint}/.api/graphql",
244
+ headers={"Authorization": f"token {client.token}"},
245
+ label="Sourcegraph",
246
+ http=client.http,
247
+ )
248
+
249
+
250
+ def _batches(values: Sequence[str], batch_size: int) -> Iterator[Sequence[str]]:
251
+ for start_index in range(0, len(values), batch_size):
252
+ yield values[start_index : start_index + batch_size]
253
+
254
+
255
+ def _user_explicit_repos_batch_query(batch_size: int) -> str:
256
+ variables = ["$first: Int!"]
257
+ fields: list[str] = []
258
+ for index in range(batch_size):
259
+ variables.extend((f"$user{index}: ID!", f"$after{index}: String"))
260
+ fields.append(
261
+ f"""
262
+ user{index}: node(id: $user{index}) {{
263
+ ... on User {{
264
+ permissionsInfo {{
265
+ repositories(source: API, first: $first, after: $after{index}) {{
266
+ nodes {{
267
+ id
268
+ }}
269
+ pageInfo {{ hasNextPage endCursor }}
270
+ }}
271
+ }}
272
+ }}
273
+ }}"""
274
+ )
275
+ return "query UserExplicitReposBatch(" + ", ".join(variables) + ") {" + "".join(fields) + "\n}"
276
+
277
+
278
+ def _user_explicit_repos_batch_variables(
279
+ batch: Sequence[tuple[str, str | None]],
280
+ ) -> src.JSONDict:
281
+ variables: src.JSONDict = {"first": shared_sourcegraph.DEFAULT_PAGE_SIZE}
282
+ for index, (user_id, cursor) in enumerate(batch):
283
+ variables[f"user{index}"] = user_id
284
+ variables[f"after{index}"] = cursor
285
+ return variables
286
+
287
+
288
+ def _user_explicit_repos_connection(data: src.JSONDict, index: int) -> src.JSONDict | None:
289
+ node = src.json_dict(data.get(f"user{index}"))
290
+ permissions_info = src.json_dict(node.get("permissionsInfo"))
291
+ connection = src.json_dict(permissions_info.get("repositories"))
292
+ return connection or None
293
+
294
+
295
+ def _connection_repository_ids(connection: src.JSONDict) -> list[str]:
296
+ repository_ids: list[str] = []
297
+ for permission_node_value in src.json_list(connection.get("nodes")):
298
+ permission_node = src.json_dict(permission_node_value)
299
+ repository_id = _permission_node_repository_id(permission_node)
300
+ if repository_id is not None:
301
+ repository_ids.append(repository_id)
302
+ return repository_ids
303
+
304
+
305
+ def _permission_node_repository_id(permission_node: src.JSONDict) -> str | None:
306
+ repository_id = permission_node.get("id")
307
+ return repository_id if isinstance(repository_id, str) else None
308
+
309
+
310
+ def _repositories_from_ids(
311
+ client: src.SourcegraphClient,
312
+ repository_ids: Sequence[str],
313
+ ) -> list[permission_types.Repository]:
314
+ repositories_by_id = list_repositories_by_ids(client, repository_ids)
315
+ return [
316
+ _repository_or_placeholder(repositories_by_id, repository_id)
317
+ for repository_id in repository_ids
318
+ ]
319
+
320
+
321
+ def _repositories_by_user_id(
322
+ client: src.SourcegraphClient,
323
+ repository_ids_by_user_id: dict[str, list[str]],
324
+ ) -> dict[str, list[permission_types.Repository]]:
325
+ unique_repository_ids = list(
326
+ dict.fromkeys(
327
+ repository_id
328
+ for repository_ids in repository_ids_by_user_id.values()
329
+ for repository_id in repository_ids
330
+ )
331
+ )
332
+ repositories_by_id = list_repositories_by_ids(client, unique_repository_ids)
333
+ missing_repository_ids = set(unique_repository_ids) - set(repositories_by_id)
334
+ if missing_repository_ids:
335
+ log.warning(
336
+ "Could not hydrate names for %d repository ID(s); using ID placeholders.",
337
+ len(missing_repository_ids),
338
+ )
339
+ return {
340
+ user_id: [
341
+ _repository_or_placeholder(repositories_by_id, repository_id)
342
+ for repository_id in repository_ids
343
+ ]
344
+ for user_id, repository_ids in repository_ids_by_user_id.items()
345
+ }
346
+
347
+
348
+ def _repository_or_placeholder(
349
+ repositories_by_id: dict[str, permission_types.Repository],
350
+ repository_id: str,
351
+ ) -> permission_types.Repository:
352
+ repository = repositories_by_id.get(repository_id)
353
+ if repository is not None:
354
+ return repository
355
+ return _missing_repository(repository_id)
356
+
357
+
358
+ def _missing_repository(repository_id: str) -> permission_types.Repository:
359
+ try:
360
+ decoded_repository_id = id_codec.decode_repository_id(repository_id)
361
+ repository_name = f"<repository id={decoded_repository_id}>"
362
+ except ValueError:
363
+ repository_name = f"<repository id={repository_id}>"
364
+ return {"id": repository_id, "name": repository_name}
365
+
366
+
367
+ def _repositories_by_id_query(batch_size: int) -> str:
368
+ variables = [f"$repo{index}: ID!" for index in range(batch_size)]
369
+ fields = [
370
+ f"""
371
+ repo{index}: node(id: $repo{index}) {{
372
+ ... on Repository {{
373
+ id
374
+ name
375
+ }}
376
+ }}"""
377
+ for index in range(batch_size)
378
+ ]
379
+ return "query RepositoryNamesByID(" + ", ".join(variables) + ") {" + "".join(fields) + "\n}"
380
+
381
+
382
+ def _repositories_by_id_variables(repository_ids: Sequence[str]) -> src.JSONDict:
383
+ return cast(
384
+ src.JSONDict,
385
+ {f"repo{index}": repository_id for index, repository_id in enumerate(repository_ids)},
386
+ )
387
+
388
+
389
+ def list_pending_bind_ids(client: src.SourcegraphClient) -> list[str]:
390
+ """Return explicit-grant bindIDs pending a real User match."""
391
+ data = cast(dict[str, Any], client.graphql(queries.QUERY_PENDING_BINDIDS))
392
+ return cast(list[str], data["usersWithPendingPermissions"])
@@ -0,0 +1,116 @@
1
+ """TypedDict shapes for repo-permission sync."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Any, Literal, NotRequired, TypeAlias, TypedDict
7
+
8
+ from ..shared import types as shared_types
9
+
10
+ SetCommandMode: TypeAlias = Literal[
11
+ "full",
12
+ "user",
13
+ "users_without_explicit_perms",
14
+ ]
15
+
16
+
17
+ @dataclass(frozen=True)
18
+ class SetCommandOptions:
19
+ """Operator-selected mode for `--set`."""
20
+
21
+ mode: SetCommandMode
22
+ user_identifier: str | None = None
23
+ user_created_after: str | None = None
24
+
25
+
26
+ class UserRef(TypedDict):
27
+ username: str
28
+
29
+
30
+ class ExternalService(TypedDict):
31
+ id: str
32
+ kind: str
33
+ displayName: str
34
+ url: str
35
+ repoCount: int
36
+ createdAt: str
37
+ updatedAt: str
38
+ lastSyncAt: str | None
39
+ nextSyncAt: str | None
40
+ lastSyncError: str | None
41
+ warning: str | None
42
+ unrestricted: bool
43
+ suspended: bool
44
+ hasConnectionCheck: bool
45
+ supportsRepoExclusion: bool
46
+ creator: UserRef | None
47
+ lastUpdater: UserRef | None
48
+ config: str
49
+
50
+
51
+ class Repository(TypedDict):
52
+ id: str
53
+ name: str
54
+
55
+
56
+ @dataclass(frozen=True, slots=True)
57
+ class RepositoryUsernameOverwrite:
58
+ """One repo overwrite plan using Sourcegraph usernames as bindIDs."""
59
+
60
+ repository_id: str
61
+ repository_name: str
62
+ usernames: tuple[str, ...]
63
+
64
+
65
+ class AuthProviderMatcher(TypedDict, total=False):
66
+ """Match users by Sourcegraph auth provider discovery fields."""
67
+
68
+ type: str
69
+ serviceID: str
70
+ clientID: str
71
+ displayName: str
72
+ configID: str
73
+ samlGroup: str
74
+
75
+
76
+ class CodeHostConnectionMatcher(TypedDict, total=False):
77
+ """Match repos by Sourcegraph code-host connection discovery fields."""
78
+
79
+ id: int
80
+ kind: str
81
+ displayName: str
82
+ url: str
83
+ config: dict[str, Any]
84
+
85
+
86
+ class UsersFilter(TypedDict, total=False):
87
+ authProvider: AuthProviderMatcher
88
+
89
+
90
+ class ReposFilter(TypedDict, total=False):
91
+ codeHostConnection: CodeHostConnectionMatcher
92
+ regex: str
93
+
94
+
95
+ class MappingRule(TypedDict):
96
+ name: NotRequired[str]
97
+ users: UsersFilter
98
+ repos: ReposFilter
99
+
100
+
101
+ class ConfigFile(TypedDict, total=False):
102
+ authProviders: list[dict[str, Any]]
103
+ codeHostConnections: list[dict[str, Any]]
104
+ maps: list[MappingRule]
105
+
106
+
107
+ @dataclass(frozen=True)
108
+ class MappingContext:
109
+ """Discovery state needed by permission mapping."""
110
+
111
+ mapping_rules: list[MappingRule]
112
+ providers: list[shared_types.AuthProvider]
113
+ saml_groups_attribute_names: dict[tuple[str, str], str]
114
+ services_by_id: dict[int, ExternalService]
115
+ repos_by_external_service_id: dict[int, list[Repository]]
116
+ all_repos_by_id: dict[str, Repository]