src-auth-perms-sync 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. src_auth_perms_sync/__init__.py +1 -0
  2. src_auth_perms_sync/__main__.py +6 -0
  3. src_auth_perms_sync/cli.py +646 -0
  4. src_auth_perms_sync/orgs/__init__.py +1 -0
  5. src_auth_perms_sync/orgs/command.py +7 -0
  6. src_auth_perms_sync/orgs/queries.py +44 -0
  7. src_auth_perms_sync/orgs/sync.py +1167 -0
  8. src_auth_perms_sync/orgs/types.py +103 -0
  9. src_auth_perms_sync/permissions/__init__.py +1 -0
  10. src_auth_perms_sync/permissions/apply.py +420 -0
  11. src_auth_perms_sync/permissions/command.py +918 -0
  12. src_auth_perms_sync/permissions/full_set.py +880 -0
  13. src_auth_perms_sync/permissions/mapping.py +627 -0
  14. src_auth_perms_sync/permissions/maps.py +291 -0
  15. src_auth_perms_sync/permissions/queries.py +180 -0
  16. src_auth_perms_sync/permissions/restore.py +913 -0
  17. src_auth_perms_sync/permissions/snapshot.py +1502 -0
  18. src_auth_perms_sync/permissions/sourcegraph.py +392 -0
  19. src_auth_perms_sync/permissions/types.py +116 -0
  20. src_auth_perms_sync/permissions/workflow.py +526 -0
  21. src_auth_perms_sync/shared/__init__.py +1 -0
  22. src_auth_perms_sync/shared/backups.py +119 -0
  23. src_auth_perms_sync/shared/id_codec.py +67 -0
  24. src_auth_perms_sync/shared/queries.py +65 -0
  25. src_auth_perms_sync/shared/run_context.py +34 -0
  26. src_auth_perms_sync/shared/saml_groups.py +267 -0
  27. src_auth_perms_sync/shared/site_config.py +366 -0
  28. src_auth_perms_sync/shared/sourcegraph.py +69 -0
  29. src_auth_perms_sync/shared/types.py +69 -0
  30. src_auth_perms_sync-0.2.1.dist-info/METADATA +256 -0
  31. src_auth_perms_sync-0.2.1.dist-info/RECORD +34 -0
  32. src_auth_perms_sync-0.2.1.dist-info/WHEEL +4 -0
  33. src_auth_perms_sync-0.2.1.dist-info/entry_points.txt +2 -0
  34. src_auth_perms_sync-0.2.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,67 @@
1
+ """Encode/decode Sourcegraph opaque GraphQL Node IDs for workflows.
2
+
3
+ Sourcegraph follows the [Relay Object Identification spec](
4
+ https://relay.dev/graphql/objectidentification.htm): every node has a
5
+ globally-unique opaque `id` of the form
6
+
7
+ base64(f"{TypeName}:{DatabasePrimaryKey}")
8
+
9
+ e.g. `ExternalService:5` → `RXh0ZXJuYWxTZXJ2aWNlOjU=`.
10
+
11
+ These helpers translate between the opaque GraphQL form (used on the
12
+ wire) and the integer primary key (used in our YAML config and logs).
13
+ The integer form is much friendlier for a human authoring mapping
14
+ rules — base64 strings of internal type names leak abstraction and are
15
+ hard to copy/diff by eye.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import base64
21
+
22
+ EXTERNAL_SERVICE_TYPE_PREFIX = "ExternalService"
23
+ REPOSITORY_TYPE_PREFIX = "Repository"
24
+
25
+
26
+ def _encode_node_id(type_prefix: str, db_id: int) -> str:
27
+ raw = f"{type_prefix}:{db_id}".encode()
28
+ return base64.b64encode(raw).decode()
29
+
30
+
31
+ def _decode_node_id(type_prefix: str, graphql_id: str) -> int:
32
+ try:
33
+ raw = base64.b64decode(graphql_id, validate=True).decode()
34
+ except (ValueError, UnicodeDecodeError) as exception:
35
+ raise ValueError(f"not a valid base64 GraphQL Node ID: {graphql_id!r}") from exception
36
+ prefix, separator, suffix = raw.partition(":")
37
+ if not separator or prefix != type_prefix:
38
+ raise ValueError(f"not a {type_prefix} Node ID: {graphql_id!r} (decoded: {raw!r})")
39
+ try:
40
+ return int(suffix)
41
+ except ValueError as exception:
42
+ raise ValueError(
43
+ f"{type_prefix} Node ID has non-integer suffix: {graphql_id!r} (decoded: {raw!r})"
44
+ ) from exception
45
+
46
+
47
+ def decode_external_service_id(graphql_id: str) -> int:
48
+ """Opaque ExternalService GraphQL Node ID → integer DB primary key.
49
+
50
+ Raises ValueError if `graphql_id` is not a well-formed
51
+ `ExternalService:<int>` node ID.
52
+ """
53
+ return _decode_node_id(EXTERNAL_SERVICE_TYPE_PREFIX, graphql_id)
54
+
55
+
56
+ def encode_repository_id(db_id: int) -> str:
57
+ """Integer DB primary key → opaque Repository GraphQL Node ID."""
58
+ return _encode_node_id(REPOSITORY_TYPE_PREFIX, db_id)
59
+
60
+
61
+ def decode_repository_id(graphql_id: str) -> int:
62
+ """Opaque Repository GraphQL Node ID → integer DB primary key.
63
+
64
+ Raises ValueError if `graphql_id` is not a well-formed
65
+ `Repository:<int>` node ID.
66
+ """
67
+ return _decode_node_id(REPOSITORY_TYPE_PREFIX, graphql_id)
@@ -0,0 +1,65 @@
1
+ """Shared auth-provider/user GraphQL operations sent to Sourcegraph."""
2
+
3
+ from __future__ import annotations
4
+
5
+ QUERY_VALIDATE_PERMISSIONS_CONFIG = """
6
+ query ValidatePermissionsConfig {
7
+ site {
8
+ permissionsUserMappingBindID
9
+ configuration {
10
+ effectiveContents
11
+ }
12
+ }
13
+ }
14
+ """
15
+
16
+ QUERY_AUTH_PROVIDERS = """
17
+ query ListAuthProviders {
18
+ site {
19
+ authProviders {
20
+ nodes {
21
+ serviceType
22
+ serviceID
23
+ clientID
24
+ displayName
25
+ isBuiltin
26
+ configID
27
+ }
28
+ }
29
+ }
30
+ }
31
+ """
32
+
33
+ QUERY_USER_COUNT = """
34
+ query CountUsers {
35
+ users(first: 1) {
36
+ totalCount
37
+ }
38
+ }
39
+ """
40
+
41
+ QUERY_USERS = """
42
+ query ListUsers($first: Int!, $after: String) {
43
+ users(first: $first, after: $after) {
44
+ nodes {
45
+ id
46
+ username
47
+ builtinAuth
48
+ externalAccounts(first: 50) {
49
+ nodes {
50
+ serviceType
51
+ serviceID
52
+ clientID
53
+ # accountData is the parsed gosaml2 AssertionInfo JSON for SAML
54
+ # accounts (used by saml_groups extraction). The server gates
55
+ # it on Site Admin for SAML/OIDC; we already require Site
56
+ # Admin. Returns null for serviceType where the resolver does
57
+ # not expose data (e.g. plain GitHub OAuth without SSO).
58
+ accountData
59
+ }
60
+ }
61
+ }
62
+ pageInfo { hasNextPage endCursor }
63
+ }
64
+ }
65
+ """
@@ -0,0 +1,34 @@
1
+ """Shared per-run state for command workflows."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Generator
6
+ from concurrent.futures import ThreadPoolExecutor
7
+ from contextlib import contextmanager
8
+ from dataclasses import dataclass
9
+
10
+ from . import types as shared_types
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class CommandData:
15
+ """Instance data a command loaded and later commands may reuse."""
16
+
17
+ auth_providers: list[shared_types.AuthProvider] | None = None
18
+ saml_group_users: list[shared_types.SamlGroupUser] | None = None
19
+
20
+
21
+ @contextmanager
22
+ def thread_pool(
23
+ parallelism: int,
24
+ worker_pool: ThreadPoolExecutor | None = None,
25
+ ) -> Generator[ThreadPoolExecutor]:
26
+ """Yield the run-owned worker pool or create a temporary pool."""
27
+ if worker_pool is not None:
28
+ yield worker_pool
29
+ return
30
+
31
+ with ThreadPoolExecutor(
32
+ max_workers=parallelism, thread_name_prefix="sg-worker"
33
+ ) as created_pool:
34
+ yield created_pool
@@ -0,0 +1,267 @@
1
+ """Parse shared SAML group memberships out of `ExternalAccount.accountData`.
2
+
3
+ Sourcegraph stores the full gosaml2 `AssertionInfo` JSON as the
4
+ `accountData` blob on each SAML external account (see
5
+ [QUERY_USERS](queries.py)). Group claims live inside the SAML assertion
6
+ attribute named by the provider's `groupsAttributeName` site config
7
+ (default `"groups"`).
8
+
9
+ This module does NOT fetch — it only parses what `list_users_with_accounts`
10
+ already pulled. Two on-disk shapes are handled defensively:
11
+
12
+ 1. Raw `*saml2.AssertionInfo`:
13
+ accountData["Assertions"][i]["AttributeStatement"]["Attributes"][j]
14
+ {"Name": "<attr>", "Values": [{"Value": "..."}, ...]}
15
+
16
+ 2. The flattened `SAMLValues` shape:
17
+ (`gosaml2 SAMLValues{Values: map[string]SAMLAttribute}`):
18
+ accountData["Values"]["<attr>"]["Values"][j]["Value"]
19
+
20
+ Either form yields the same flat list of group-name strings.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ from collections.abc import Iterable
26
+ from typing import Any, cast
27
+
28
+ from . import types as shared_types
29
+
30
+ DEFAULT_GROUPS_ATTRIBUTE_NAME: str = "groups"
31
+ SAML_SERVICE_TYPE: str = "saml"
32
+
33
+ # Per-(serviceID, clientID) override of the SAML groups attribute name.
34
+ # `None` or a missing key means "use DEFAULT_GROUPS_ATTRIBUTE_NAME for
35
+ # this provider". Built by `attribute_names_by_provider_key()` from the
36
+ # discovered AuthProvider list joined against the configID-keyed
37
+ # overrides we parse out of site config.
38
+ SamlGroupsAttributeNameByProvider = dict[tuple[str, str], str]
39
+
40
+
41
+ def saml_providers_by_account_key(
42
+ providers: Iterable[shared_types.AuthProvider],
43
+ ) -> dict[tuple[str, str], shared_types.AuthProvider]:
44
+ """Return SAML auth providers keyed like `ExternalAccount` rows."""
45
+ return {
46
+ (provider["serviceID"], provider["clientID"]): provider
47
+ for provider in providers
48
+ if provider["serviceType"] == SAML_SERVICE_TYPE
49
+ }
50
+
51
+
52
+ def attribute_names_by_provider_key(
53
+ providers: list[shared_types.AuthProvider],
54
+ overrides_by_config_id: dict[str, str],
55
+ ) -> SamlGroupsAttributeNameByProvider:
56
+ """Re-key per-`configID` overrides to (serviceID, clientID).
57
+
58
+ `extract_saml_groups` / `count_users_per_saml_group` look up by the
59
+ pair the user's external account exposes (serviceID, clientID), but
60
+ site config keys overrides by `configID`. Resolve the join here once,
61
+ so callers of the parsing helpers can just hand them a single map.
62
+ """
63
+ by_provider: SamlGroupsAttributeNameByProvider = {}
64
+ for provider in providers:
65
+ if provider["serviceType"] != SAML_SERVICE_TYPE:
66
+ continue
67
+ attribute_name = overrides_by_config_id.get(provider["configID"])
68
+ if attribute_name is None:
69
+ continue
70
+ by_provider[provider["serviceID"], provider["clientID"]] = attribute_name
71
+ return by_provider
72
+
73
+
74
+ def attribute_name_for(
75
+ overrides: SamlGroupsAttributeNameByProvider | None,
76
+ service_id: str,
77
+ client_id: str,
78
+ ) -> str:
79
+ """Lookup helper: return the per-provider override or the default."""
80
+ if overrides is None:
81
+ return DEFAULT_GROUPS_ATTRIBUTE_NAME
82
+ return overrides.get((service_id, client_id), DEFAULT_GROUPS_ATTRIBUTE_NAME)
83
+
84
+
85
+ MISSING_GROUP_NAME: str = "missingGroup"
86
+
87
+
88
+ def extract_saml_groups(
89
+ account_data: dict[str, Any] | None,
90
+ attribute_name: str = DEFAULT_GROUPS_ATTRIBUTE_NAME,
91
+ ) -> list[str]:
92
+ """Pull the group-name strings out of one SAML `accountData` blob.
93
+
94
+ Returns `[]` for null/empty data, missing attribute, or unknown shape
95
+ — never raises. Duplicate group names within one assertion are
96
+ de-duplicated; ordering is preserved.
97
+ """
98
+ if not account_data:
99
+ return []
100
+ groups: list[str] = []
101
+ seen_set: set[str] = set()
102
+ for group in _iter_saml_group_values(account_data, attribute_name):
103
+ if group not in seen_set:
104
+ groups.append(group)
105
+ seen_set.add(group)
106
+ return groups
107
+
108
+
109
+ def _iter_saml_group_values(account_data: dict[str, Any], attribute_name: str) -> Iterable[str]:
110
+ yield from _iter_assertion_group_values(account_data, attribute_name)
111
+ yield from _iter_flat_group_values(account_data, attribute_name)
112
+
113
+
114
+ def _iter_assertion_group_values(
115
+ account_data: dict[str, Any], attribute_name: str
116
+ ) -> Iterable[str]:
117
+ """Yield groups from raw AssertionInfo accountData."""
118
+ for assertion_dict in _dict_items(account_data.get("Assertions")):
119
+ statement = assertion_dict.get("AttributeStatement")
120
+ if not isinstance(statement, dict):
121
+ continue
122
+ statement_dict = cast(dict[str, Any], statement)
123
+ for attribute_dict in _dict_items(statement_dict.get("Attributes")):
124
+ if attribute_dict.get("Name") != attribute_name:
125
+ continue
126
+ yield from _iter_attribute_values(attribute_dict)
127
+
128
+
129
+ def _iter_flat_group_values(account_data: dict[str, Any], attribute_name: str) -> Iterable[str]:
130
+ """Yield groups from flattened SAMLValues accountData."""
131
+ flat = account_data.get("Values")
132
+ if not isinstance(flat, dict):
133
+ return
134
+ attribute = cast(dict[str, Any], flat).get(attribute_name)
135
+ if isinstance(attribute, dict):
136
+ yield from _iter_attribute_values(cast(dict[str, Any], attribute))
137
+
138
+
139
+ def _iter_attribute_values(attribute: dict[str, Any]) -> Iterable[str]:
140
+ for value in _dict_items(attribute.get("Values")):
141
+ raw_value = value.get("Value")
142
+ if isinstance(raw_value, str):
143
+ yield raw_value
144
+
145
+
146
+ def _dict_items(value: Any) -> list[dict[str, Any]]:
147
+ if not isinstance(value, list):
148
+ return []
149
+ items = cast(list[Any], value)
150
+ return [cast(dict[str, Any], item) for item in items if isinstance(item, dict)]
151
+
152
+
153
+ def compact_saml_group_user(
154
+ user: shared_types.User,
155
+ providers_by_account_key: dict[tuple[str, str], shared_types.AuthProvider],
156
+ attribute_names_by_provider: SamlGroupsAttributeNameByProvider,
157
+ ) -> shared_types.SamlGroupUser | None:
158
+ """Return only the user fields org sync needs from one full user row."""
159
+ memberships: list[shared_types.SamlGroupMembership] = []
160
+ seen: set[tuple[str, str]] = set()
161
+ for account in user["externalAccounts"]["nodes"]:
162
+ if account["serviceType"] != SAML_SERVICE_TYPE:
163
+ continue
164
+ provider = providers_by_account_key.get((account["serviceID"], account["clientID"]))
165
+ if provider is None:
166
+ continue
167
+ attribute_name = attribute_name_for(
168
+ attribute_names_by_provider,
169
+ account["serviceID"],
170
+ account["clientID"],
171
+ )
172
+ for group_name in extract_saml_groups(account.get("accountData"), attribute_name):
173
+ membership_key = (provider["configID"], group_name)
174
+ if membership_key in seen:
175
+ continue
176
+ memberships.append(
177
+ shared_types.SamlGroupMembership(
178
+ provider_config_id=provider["configID"],
179
+ group_name=group_name,
180
+ )
181
+ )
182
+ seen.add(membership_key)
183
+ if not memberships:
184
+ return None
185
+ return shared_types.SamlGroupUser(
186
+ user_id=user["id"],
187
+ username=user["username"],
188
+ saml_group_memberships=tuple(memberships),
189
+ )
190
+
191
+
192
+ def compact_saml_group_users(
193
+ users: Iterable[shared_types.User],
194
+ providers: Iterable[shared_types.AuthProvider],
195
+ attribute_names_by_provider: SamlGroupsAttributeNameByProvider,
196
+ ) -> list[shared_types.SamlGroupUser]:
197
+ """Compact full users to the org-sync data needed later in the run."""
198
+ providers_by_account_key = saml_providers_by_account_key(providers)
199
+ compact_users: list[shared_types.SamlGroupUser] = []
200
+ for user in users:
201
+ compact_user = compact_saml_group_user(
202
+ user, providers_by_account_key, attribute_names_by_provider
203
+ )
204
+ if compact_user is not None:
205
+ compact_users.append(compact_user)
206
+ return compact_users
207
+
208
+
209
+ def count_users_per_saml_group(
210
+ users: Iterable[shared_types.User],
211
+ attribute_names_by_provider: SamlGroupsAttributeNameByProvider | None = None,
212
+ ) -> dict[tuple[str, str], dict[str, int]]:
213
+ """Tally users per `(serviceID, clientID)` SAML provider per group.
214
+
215
+ Output keys mirror the `(serviceID, clientID)` pair on
216
+ `AuthProvider`/`ExternalAccount` so the caller can join against
217
+ `count_users_per_provider`'s discovered SAML providers without
218
+ re-keying.
219
+
220
+ `attribute_names_by_provider` is the per-(serviceID, clientID)
221
+ override map produced by `attribute_names_by_provider_key()`.
222
+ Providers without an entry fall back to
223
+ `DEFAULT_GROUPS_ATTRIBUTE_NAME` ("groups"). Pass `None` (default)
224
+ when no site config is available; every provider then falls back to
225
+ the default.
226
+
227
+ A user is counted at most once per (provider, group) — multiple
228
+ accounts under the same provider with overlapping groups don't
229
+ double-count, and groups that don't appear in any user's assertion
230
+ don't appear in the output at all.
231
+
232
+ SAML users on a provider whose assertion did not include any group
233
+ membership are tallied under the synthetic group name
234
+ `missingGroup` so operators can size the "ungrouped" cohort. A user
235
+ with at least one account-with-groups on the provider is NOT counted
236
+ as missing, even if another of their accounts on the same provider
237
+ lacks groups.
238
+ """
239
+ seen: dict[tuple[str, str], dict[str, set[str]]] = {}
240
+ provider_users: dict[tuple[str, str], set[str]] = {}
241
+ grouped_users: dict[tuple[str, str], set[str]] = {}
242
+ for user in users:
243
+ for account in user["externalAccounts"]["nodes"]:
244
+ if account["serviceType"] != SAML_SERVICE_TYPE:
245
+ continue
246
+ provider_key = (account["serviceID"], account["clientID"])
247
+ provider_users.setdefault(provider_key, set()).add(user["id"])
248
+ attribute_name = attribute_name_for(
249
+ attribute_names_by_provider, account["serviceID"], account["clientID"]
250
+ )
251
+ groups = extract_saml_groups(account.get("accountData"), attribute_name)
252
+ if not groups:
253
+ continue
254
+ grouped_users.setdefault(provider_key, set()).add(user["id"])
255
+ per_group = seen.setdefault(provider_key, {})
256
+ for group in groups:
257
+ per_group.setdefault(group, set()).add(user["id"])
258
+ result: dict[tuple[str, str], dict[str, int]] = {}
259
+ for provider_key, all_user_ids in provider_users.items():
260
+ per_group = seen.get(provider_key, {})
261
+ counts = {group: len(user_ids) for group, user_ids in per_group.items()}
262
+ missing = all_user_ids - grouped_users.get(provider_key, set())
263
+ if missing:
264
+ counts[MISSING_GROUP_NAME] = len(missing)
265
+ if counts:
266
+ result[provider_key] = counts
267
+ return result