src-auth-perms-sync 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- src_auth_perms_sync/__init__.py +1 -0
- src_auth_perms_sync/__main__.py +6 -0
- src_auth_perms_sync/cli.py +646 -0
- src_auth_perms_sync/orgs/__init__.py +1 -0
- src_auth_perms_sync/orgs/command.py +7 -0
- src_auth_perms_sync/orgs/queries.py +44 -0
- src_auth_perms_sync/orgs/sync.py +1167 -0
- src_auth_perms_sync/orgs/types.py +103 -0
- src_auth_perms_sync/permissions/__init__.py +1 -0
- src_auth_perms_sync/permissions/apply.py +420 -0
- src_auth_perms_sync/permissions/command.py +918 -0
- src_auth_perms_sync/permissions/full_set.py +880 -0
- src_auth_perms_sync/permissions/mapping.py +627 -0
- src_auth_perms_sync/permissions/maps.py +291 -0
- src_auth_perms_sync/permissions/queries.py +180 -0
- src_auth_perms_sync/permissions/restore.py +913 -0
- src_auth_perms_sync/permissions/snapshot.py +1502 -0
- src_auth_perms_sync/permissions/sourcegraph.py +392 -0
- src_auth_perms_sync/permissions/types.py +116 -0
- src_auth_perms_sync/permissions/workflow.py +526 -0
- src_auth_perms_sync/shared/__init__.py +1 -0
- src_auth_perms_sync/shared/backups.py +119 -0
- src_auth_perms_sync/shared/id_codec.py +67 -0
- src_auth_perms_sync/shared/queries.py +65 -0
- src_auth_perms_sync/shared/run_context.py +34 -0
- src_auth_perms_sync/shared/saml_groups.py +267 -0
- src_auth_perms_sync/shared/site_config.py +366 -0
- src_auth_perms_sync/shared/sourcegraph.py +69 -0
- src_auth_perms_sync/shared/types.py +69 -0
- src_auth_perms_sync-0.2.1.dist-info/METADATA +256 -0
- src_auth_perms_sync-0.2.1.dist-info/RECORD +34 -0
- src_auth_perms_sync-0.2.1.dist-info/WHEEL +4 -0
- src_auth_perms_sync-0.2.1.dist-info/entry_points.txt +2 -0
- src_auth_perms_sync-0.2.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""Encode/decode Sourcegraph opaque GraphQL Node IDs for workflows.
|
|
2
|
+
|
|
3
|
+
Sourcegraph follows the [Relay Object Identification spec](
|
|
4
|
+
https://relay.dev/graphql/objectidentification.htm): every node has a
|
|
5
|
+
globally-unique opaque `id` of the form
|
|
6
|
+
|
|
7
|
+
base64(f"{TypeName}:{DatabasePrimaryKey}")
|
|
8
|
+
|
|
9
|
+
e.g. `ExternalService:5` → `RXh0ZXJuYWxTZXJ2aWNlOjU=`.
|
|
10
|
+
|
|
11
|
+
These helpers translate between the opaque GraphQL form (used on the
|
|
12
|
+
wire) and the integer primary key (used in our YAML config and logs).
|
|
13
|
+
The integer form is much friendlier for a human authoring mapping
|
|
14
|
+
rules — base64 strings of internal type names leak abstraction and are
|
|
15
|
+
hard to copy/diff by eye.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import base64
|
|
21
|
+
|
|
22
|
+
EXTERNAL_SERVICE_TYPE_PREFIX = "ExternalService"
|
|
23
|
+
REPOSITORY_TYPE_PREFIX = "Repository"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _encode_node_id(type_prefix: str, db_id: int) -> str:
|
|
27
|
+
raw = f"{type_prefix}:{db_id}".encode()
|
|
28
|
+
return base64.b64encode(raw).decode()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _decode_node_id(type_prefix: str, graphql_id: str) -> int:
|
|
32
|
+
try:
|
|
33
|
+
raw = base64.b64decode(graphql_id, validate=True).decode()
|
|
34
|
+
except (ValueError, UnicodeDecodeError) as exception:
|
|
35
|
+
raise ValueError(f"not a valid base64 GraphQL Node ID: {graphql_id!r}") from exception
|
|
36
|
+
prefix, separator, suffix = raw.partition(":")
|
|
37
|
+
if not separator or prefix != type_prefix:
|
|
38
|
+
raise ValueError(f"not a {type_prefix} Node ID: {graphql_id!r} (decoded: {raw!r})")
|
|
39
|
+
try:
|
|
40
|
+
return int(suffix)
|
|
41
|
+
except ValueError as exception:
|
|
42
|
+
raise ValueError(
|
|
43
|
+
f"{type_prefix} Node ID has non-integer suffix: {graphql_id!r} (decoded: {raw!r})"
|
|
44
|
+
) from exception
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def decode_external_service_id(graphql_id: str) -> int:
|
|
48
|
+
"""Opaque ExternalService GraphQL Node ID → integer DB primary key.
|
|
49
|
+
|
|
50
|
+
Raises ValueError if `graphql_id` is not a well-formed
|
|
51
|
+
`ExternalService:<int>` node ID.
|
|
52
|
+
"""
|
|
53
|
+
return _decode_node_id(EXTERNAL_SERVICE_TYPE_PREFIX, graphql_id)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def encode_repository_id(db_id: int) -> str:
|
|
57
|
+
"""Integer DB primary key → opaque Repository GraphQL Node ID."""
|
|
58
|
+
return _encode_node_id(REPOSITORY_TYPE_PREFIX, db_id)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def decode_repository_id(graphql_id: str) -> int:
|
|
62
|
+
"""Opaque Repository GraphQL Node ID → integer DB primary key.
|
|
63
|
+
|
|
64
|
+
Raises ValueError if `graphql_id` is not a well-formed
|
|
65
|
+
`Repository:<int>` node ID.
|
|
66
|
+
"""
|
|
67
|
+
return _decode_node_id(REPOSITORY_TYPE_PREFIX, graphql_id)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Shared auth-provider/user GraphQL operations sent to Sourcegraph."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
QUERY_VALIDATE_PERMISSIONS_CONFIG = """
|
|
6
|
+
query ValidatePermissionsConfig {
|
|
7
|
+
site {
|
|
8
|
+
permissionsUserMappingBindID
|
|
9
|
+
configuration {
|
|
10
|
+
effectiveContents
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
QUERY_AUTH_PROVIDERS = """
|
|
17
|
+
query ListAuthProviders {
|
|
18
|
+
site {
|
|
19
|
+
authProviders {
|
|
20
|
+
nodes {
|
|
21
|
+
serviceType
|
|
22
|
+
serviceID
|
|
23
|
+
clientID
|
|
24
|
+
displayName
|
|
25
|
+
isBuiltin
|
|
26
|
+
configID
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
QUERY_USER_COUNT = """
|
|
34
|
+
query CountUsers {
|
|
35
|
+
users(first: 1) {
|
|
36
|
+
totalCount
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
QUERY_USERS = """
|
|
42
|
+
query ListUsers($first: Int!, $after: String) {
|
|
43
|
+
users(first: $first, after: $after) {
|
|
44
|
+
nodes {
|
|
45
|
+
id
|
|
46
|
+
username
|
|
47
|
+
builtinAuth
|
|
48
|
+
externalAccounts(first: 50) {
|
|
49
|
+
nodes {
|
|
50
|
+
serviceType
|
|
51
|
+
serviceID
|
|
52
|
+
clientID
|
|
53
|
+
# accountData is the parsed gosaml2 AssertionInfo JSON for SAML
|
|
54
|
+
# accounts (used by saml_groups extraction). The server gates
|
|
55
|
+
# it on Site Admin for SAML/OIDC; we already require Site
|
|
56
|
+
# Admin. Returns null for serviceType where the resolver does
|
|
57
|
+
# not expose data (e.g. plain GitHub OAuth without SSO).
|
|
58
|
+
accountData
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
pageInfo { hasNextPage endCursor }
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
"""
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Shared per-run state for command workflows."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Generator
|
|
6
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
7
|
+
from contextlib import contextmanager
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
|
|
10
|
+
from . import types as shared_types
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class CommandData:
|
|
15
|
+
"""Instance data a command loaded and later commands may reuse."""
|
|
16
|
+
|
|
17
|
+
auth_providers: list[shared_types.AuthProvider] | None = None
|
|
18
|
+
saml_group_users: list[shared_types.SamlGroupUser] | None = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@contextmanager
|
|
22
|
+
def thread_pool(
|
|
23
|
+
parallelism: int,
|
|
24
|
+
worker_pool: ThreadPoolExecutor | None = None,
|
|
25
|
+
) -> Generator[ThreadPoolExecutor]:
|
|
26
|
+
"""Yield the run-owned worker pool or create a temporary pool."""
|
|
27
|
+
if worker_pool is not None:
|
|
28
|
+
yield worker_pool
|
|
29
|
+
return
|
|
30
|
+
|
|
31
|
+
with ThreadPoolExecutor(
|
|
32
|
+
max_workers=parallelism, thread_name_prefix="sg-worker"
|
|
33
|
+
) as created_pool:
|
|
34
|
+
yield created_pool
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
"""Parse shared SAML group memberships out of `ExternalAccount.accountData`.
|
|
2
|
+
|
|
3
|
+
Sourcegraph stores the full gosaml2 `AssertionInfo` JSON as the
|
|
4
|
+
`accountData` blob on each SAML external account (see
|
|
5
|
+
[QUERY_USERS](queries.py)). Group claims live inside the SAML assertion
|
|
6
|
+
attribute named by the provider's `groupsAttributeName` site config
|
|
7
|
+
(default `"groups"`).
|
|
8
|
+
|
|
9
|
+
This module does NOT fetch — it only parses what `list_users_with_accounts`
|
|
10
|
+
already pulled. Two on-disk shapes are handled defensively:
|
|
11
|
+
|
|
12
|
+
1. Raw `*saml2.AssertionInfo`:
|
|
13
|
+
accountData["Assertions"][i]["AttributeStatement"]["Attributes"][j]
|
|
14
|
+
{"Name": "<attr>", "Values": [{"Value": "..."}, ...]}
|
|
15
|
+
|
|
16
|
+
2. The flattened `SAMLValues` shape:
|
|
17
|
+
(`gosaml2 SAMLValues{Values: map[string]SAMLAttribute}`):
|
|
18
|
+
accountData["Values"]["<attr>"]["Values"][j]["Value"]
|
|
19
|
+
|
|
20
|
+
Either form yields the same flat list of group-name strings.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
from collections.abc import Iterable
|
|
26
|
+
from typing import Any, cast
|
|
27
|
+
|
|
28
|
+
from . import types as shared_types
|
|
29
|
+
|
|
30
|
+
DEFAULT_GROUPS_ATTRIBUTE_NAME: str = "groups"
|
|
31
|
+
SAML_SERVICE_TYPE: str = "saml"
|
|
32
|
+
|
|
33
|
+
# Per-(serviceID, clientID) override of the SAML groups attribute name.
|
|
34
|
+
# `None` or a missing key means "use DEFAULT_GROUPS_ATTRIBUTE_NAME for
|
|
35
|
+
# this provider". Built by `attribute_names_by_provider_key()` from the
|
|
36
|
+
# discovered AuthProvider list joined against the configID-keyed
|
|
37
|
+
# overrides we parse out of site config.
|
|
38
|
+
SamlGroupsAttributeNameByProvider = dict[tuple[str, str], str]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def saml_providers_by_account_key(
|
|
42
|
+
providers: Iterable[shared_types.AuthProvider],
|
|
43
|
+
) -> dict[tuple[str, str], shared_types.AuthProvider]:
|
|
44
|
+
"""Return SAML auth providers keyed like `ExternalAccount` rows."""
|
|
45
|
+
return {
|
|
46
|
+
(provider["serviceID"], provider["clientID"]): provider
|
|
47
|
+
for provider in providers
|
|
48
|
+
if provider["serviceType"] == SAML_SERVICE_TYPE
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def attribute_names_by_provider_key(
|
|
53
|
+
providers: list[shared_types.AuthProvider],
|
|
54
|
+
overrides_by_config_id: dict[str, str],
|
|
55
|
+
) -> SamlGroupsAttributeNameByProvider:
|
|
56
|
+
"""Re-key per-`configID` overrides to (serviceID, clientID).
|
|
57
|
+
|
|
58
|
+
`extract_saml_groups` / `count_users_per_saml_group` look up by the
|
|
59
|
+
pair the user's external account exposes (serviceID, clientID), but
|
|
60
|
+
site config keys overrides by `configID`. Resolve the join here once,
|
|
61
|
+
so callers of the parsing helpers can just hand them a single map.
|
|
62
|
+
"""
|
|
63
|
+
by_provider: SamlGroupsAttributeNameByProvider = {}
|
|
64
|
+
for provider in providers:
|
|
65
|
+
if provider["serviceType"] != SAML_SERVICE_TYPE:
|
|
66
|
+
continue
|
|
67
|
+
attribute_name = overrides_by_config_id.get(provider["configID"])
|
|
68
|
+
if attribute_name is None:
|
|
69
|
+
continue
|
|
70
|
+
by_provider[provider["serviceID"], provider["clientID"]] = attribute_name
|
|
71
|
+
return by_provider
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def attribute_name_for(
|
|
75
|
+
overrides: SamlGroupsAttributeNameByProvider | None,
|
|
76
|
+
service_id: str,
|
|
77
|
+
client_id: str,
|
|
78
|
+
) -> str:
|
|
79
|
+
"""Lookup helper: return the per-provider override or the default."""
|
|
80
|
+
if overrides is None:
|
|
81
|
+
return DEFAULT_GROUPS_ATTRIBUTE_NAME
|
|
82
|
+
return overrides.get((service_id, client_id), DEFAULT_GROUPS_ATTRIBUTE_NAME)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
MISSING_GROUP_NAME: str = "missingGroup"
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def extract_saml_groups(
|
|
89
|
+
account_data: dict[str, Any] | None,
|
|
90
|
+
attribute_name: str = DEFAULT_GROUPS_ATTRIBUTE_NAME,
|
|
91
|
+
) -> list[str]:
|
|
92
|
+
"""Pull the group-name strings out of one SAML `accountData` blob.
|
|
93
|
+
|
|
94
|
+
Returns `[]` for null/empty data, missing attribute, or unknown shape
|
|
95
|
+
— never raises. Duplicate group names within one assertion are
|
|
96
|
+
de-duplicated; ordering is preserved.
|
|
97
|
+
"""
|
|
98
|
+
if not account_data:
|
|
99
|
+
return []
|
|
100
|
+
groups: list[str] = []
|
|
101
|
+
seen_set: set[str] = set()
|
|
102
|
+
for group in _iter_saml_group_values(account_data, attribute_name):
|
|
103
|
+
if group not in seen_set:
|
|
104
|
+
groups.append(group)
|
|
105
|
+
seen_set.add(group)
|
|
106
|
+
return groups
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _iter_saml_group_values(account_data: dict[str, Any], attribute_name: str) -> Iterable[str]:
|
|
110
|
+
yield from _iter_assertion_group_values(account_data, attribute_name)
|
|
111
|
+
yield from _iter_flat_group_values(account_data, attribute_name)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _iter_assertion_group_values(
|
|
115
|
+
account_data: dict[str, Any], attribute_name: str
|
|
116
|
+
) -> Iterable[str]:
|
|
117
|
+
"""Yield groups from raw AssertionInfo accountData."""
|
|
118
|
+
for assertion_dict in _dict_items(account_data.get("Assertions")):
|
|
119
|
+
statement = assertion_dict.get("AttributeStatement")
|
|
120
|
+
if not isinstance(statement, dict):
|
|
121
|
+
continue
|
|
122
|
+
statement_dict = cast(dict[str, Any], statement)
|
|
123
|
+
for attribute_dict in _dict_items(statement_dict.get("Attributes")):
|
|
124
|
+
if attribute_dict.get("Name") != attribute_name:
|
|
125
|
+
continue
|
|
126
|
+
yield from _iter_attribute_values(attribute_dict)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _iter_flat_group_values(account_data: dict[str, Any], attribute_name: str) -> Iterable[str]:
|
|
130
|
+
"""Yield groups from flattened SAMLValues accountData."""
|
|
131
|
+
flat = account_data.get("Values")
|
|
132
|
+
if not isinstance(flat, dict):
|
|
133
|
+
return
|
|
134
|
+
attribute = cast(dict[str, Any], flat).get(attribute_name)
|
|
135
|
+
if isinstance(attribute, dict):
|
|
136
|
+
yield from _iter_attribute_values(cast(dict[str, Any], attribute))
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _iter_attribute_values(attribute: dict[str, Any]) -> Iterable[str]:
|
|
140
|
+
for value in _dict_items(attribute.get("Values")):
|
|
141
|
+
raw_value = value.get("Value")
|
|
142
|
+
if isinstance(raw_value, str):
|
|
143
|
+
yield raw_value
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _dict_items(value: Any) -> list[dict[str, Any]]:
|
|
147
|
+
if not isinstance(value, list):
|
|
148
|
+
return []
|
|
149
|
+
items = cast(list[Any], value)
|
|
150
|
+
return [cast(dict[str, Any], item) for item in items if isinstance(item, dict)]
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def compact_saml_group_user(
|
|
154
|
+
user: shared_types.User,
|
|
155
|
+
providers_by_account_key: dict[tuple[str, str], shared_types.AuthProvider],
|
|
156
|
+
attribute_names_by_provider: SamlGroupsAttributeNameByProvider,
|
|
157
|
+
) -> shared_types.SamlGroupUser | None:
|
|
158
|
+
"""Return only the user fields org sync needs from one full user row."""
|
|
159
|
+
memberships: list[shared_types.SamlGroupMembership] = []
|
|
160
|
+
seen: set[tuple[str, str]] = set()
|
|
161
|
+
for account in user["externalAccounts"]["nodes"]:
|
|
162
|
+
if account["serviceType"] != SAML_SERVICE_TYPE:
|
|
163
|
+
continue
|
|
164
|
+
provider = providers_by_account_key.get((account["serviceID"], account["clientID"]))
|
|
165
|
+
if provider is None:
|
|
166
|
+
continue
|
|
167
|
+
attribute_name = attribute_name_for(
|
|
168
|
+
attribute_names_by_provider,
|
|
169
|
+
account["serviceID"],
|
|
170
|
+
account["clientID"],
|
|
171
|
+
)
|
|
172
|
+
for group_name in extract_saml_groups(account.get("accountData"), attribute_name):
|
|
173
|
+
membership_key = (provider["configID"], group_name)
|
|
174
|
+
if membership_key in seen:
|
|
175
|
+
continue
|
|
176
|
+
memberships.append(
|
|
177
|
+
shared_types.SamlGroupMembership(
|
|
178
|
+
provider_config_id=provider["configID"],
|
|
179
|
+
group_name=group_name,
|
|
180
|
+
)
|
|
181
|
+
)
|
|
182
|
+
seen.add(membership_key)
|
|
183
|
+
if not memberships:
|
|
184
|
+
return None
|
|
185
|
+
return shared_types.SamlGroupUser(
|
|
186
|
+
user_id=user["id"],
|
|
187
|
+
username=user["username"],
|
|
188
|
+
saml_group_memberships=tuple(memberships),
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def compact_saml_group_users(
|
|
193
|
+
users: Iterable[shared_types.User],
|
|
194
|
+
providers: Iterable[shared_types.AuthProvider],
|
|
195
|
+
attribute_names_by_provider: SamlGroupsAttributeNameByProvider,
|
|
196
|
+
) -> list[shared_types.SamlGroupUser]:
|
|
197
|
+
"""Compact full users to the org-sync data needed later in the run."""
|
|
198
|
+
providers_by_account_key = saml_providers_by_account_key(providers)
|
|
199
|
+
compact_users: list[shared_types.SamlGroupUser] = []
|
|
200
|
+
for user in users:
|
|
201
|
+
compact_user = compact_saml_group_user(
|
|
202
|
+
user, providers_by_account_key, attribute_names_by_provider
|
|
203
|
+
)
|
|
204
|
+
if compact_user is not None:
|
|
205
|
+
compact_users.append(compact_user)
|
|
206
|
+
return compact_users
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def count_users_per_saml_group(
|
|
210
|
+
users: Iterable[shared_types.User],
|
|
211
|
+
attribute_names_by_provider: SamlGroupsAttributeNameByProvider | None = None,
|
|
212
|
+
) -> dict[tuple[str, str], dict[str, int]]:
|
|
213
|
+
"""Tally users per `(serviceID, clientID)` SAML provider per group.
|
|
214
|
+
|
|
215
|
+
Output keys mirror the `(serviceID, clientID)` pair on
|
|
216
|
+
`AuthProvider`/`ExternalAccount` so the caller can join against
|
|
217
|
+
`count_users_per_provider`'s discovered SAML providers without
|
|
218
|
+
re-keying.
|
|
219
|
+
|
|
220
|
+
`attribute_names_by_provider` is the per-(serviceID, clientID)
|
|
221
|
+
override map produced by `attribute_names_by_provider_key()`.
|
|
222
|
+
Providers without an entry fall back to
|
|
223
|
+
`DEFAULT_GROUPS_ATTRIBUTE_NAME` ("groups"). Pass `None` (default)
|
|
224
|
+
when no site config is available; every provider then falls back to
|
|
225
|
+
the default.
|
|
226
|
+
|
|
227
|
+
A user is counted at most once per (provider, group) — multiple
|
|
228
|
+
accounts under the same provider with overlapping groups don't
|
|
229
|
+
double-count, and groups that don't appear in any user's assertion
|
|
230
|
+
don't appear in the output at all.
|
|
231
|
+
|
|
232
|
+
SAML users on a provider whose assertion did not include any group
|
|
233
|
+
membership are tallied under the synthetic group name
|
|
234
|
+
`missingGroup` so operators can size the "ungrouped" cohort. A user
|
|
235
|
+
with at least one account-with-groups on the provider is NOT counted
|
|
236
|
+
as missing, even if another of their accounts on the same provider
|
|
237
|
+
lacks groups.
|
|
238
|
+
"""
|
|
239
|
+
seen: dict[tuple[str, str], dict[str, set[str]]] = {}
|
|
240
|
+
provider_users: dict[tuple[str, str], set[str]] = {}
|
|
241
|
+
grouped_users: dict[tuple[str, str], set[str]] = {}
|
|
242
|
+
for user in users:
|
|
243
|
+
for account in user["externalAccounts"]["nodes"]:
|
|
244
|
+
if account["serviceType"] != SAML_SERVICE_TYPE:
|
|
245
|
+
continue
|
|
246
|
+
provider_key = (account["serviceID"], account["clientID"])
|
|
247
|
+
provider_users.setdefault(provider_key, set()).add(user["id"])
|
|
248
|
+
attribute_name = attribute_name_for(
|
|
249
|
+
attribute_names_by_provider, account["serviceID"], account["clientID"]
|
|
250
|
+
)
|
|
251
|
+
groups = extract_saml_groups(account.get("accountData"), attribute_name)
|
|
252
|
+
if not groups:
|
|
253
|
+
continue
|
|
254
|
+
grouped_users.setdefault(provider_key, set()).add(user["id"])
|
|
255
|
+
per_group = seen.setdefault(provider_key, {})
|
|
256
|
+
for group in groups:
|
|
257
|
+
per_group.setdefault(group, set()).add(user["id"])
|
|
258
|
+
result: dict[tuple[str, str], dict[str, int]] = {}
|
|
259
|
+
for provider_key, all_user_ids in provider_users.items():
|
|
260
|
+
per_group = seen.get(provider_key, {})
|
|
261
|
+
counts = {group: len(user_ids) for group, user_ids in per_group.items()}
|
|
262
|
+
missing = all_user_ids - grouped_users.get(provider_key, set())
|
|
263
|
+
if missing:
|
|
264
|
+
counts[MISSING_GROUP_NAME] = len(missing)
|
|
265
|
+
if counts:
|
|
266
|
+
result[provider_key] = counts
|
|
267
|
+
return result
|