src-auth-perms-sync 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- src_auth_perms_sync/__init__.py +1 -0
- src_auth_perms_sync/__main__.py +6 -0
- src_auth_perms_sync/cli.py +646 -0
- src_auth_perms_sync/orgs/__init__.py +1 -0
- src_auth_perms_sync/orgs/command.py +7 -0
- src_auth_perms_sync/orgs/queries.py +44 -0
- src_auth_perms_sync/orgs/sync.py +1167 -0
- src_auth_perms_sync/orgs/types.py +103 -0
- src_auth_perms_sync/permissions/__init__.py +1 -0
- src_auth_perms_sync/permissions/apply.py +420 -0
- src_auth_perms_sync/permissions/command.py +918 -0
- src_auth_perms_sync/permissions/full_set.py +880 -0
- src_auth_perms_sync/permissions/mapping.py +627 -0
- src_auth_perms_sync/permissions/maps.py +291 -0
- src_auth_perms_sync/permissions/queries.py +180 -0
- src_auth_perms_sync/permissions/restore.py +913 -0
- src_auth_perms_sync/permissions/snapshot.py +1502 -0
- src_auth_perms_sync/permissions/sourcegraph.py +392 -0
- src_auth_perms_sync/permissions/types.py +116 -0
- src_auth_perms_sync/permissions/workflow.py +526 -0
- src_auth_perms_sync/shared/__init__.py +1 -0
- src_auth_perms_sync/shared/backups.py +119 -0
- src_auth_perms_sync/shared/id_codec.py +67 -0
- src_auth_perms_sync/shared/queries.py +65 -0
- src_auth_perms_sync/shared/run_context.py +34 -0
- src_auth_perms_sync/shared/saml_groups.py +267 -0
- src_auth_perms_sync/shared/site_config.py +366 -0
- src_auth_perms_sync/shared/sourcegraph.py +69 -0
- src_auth_perms_sync/shared/types.py +69 -0
- src_auth_perms_sync-0.2.1.dist-info/METADATA +256 -0
- src_auth_perms_sync-0.2.1.dist-info/RECORD +34 -0
- src_auth_perms_sync-0.2.1.dist-info/WHEEL +4 -0
- src_auth_perms_sync-0.2.1.dist-info/entry_points.txt +2 -0
- src_auth_perms_sync-0.2.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,627 @@
|
|
|
1
|
+
"""Permission mapping resolution: validate rules and match users/repos.
|
|
2
|
+
|
|
3
|
+
Each mapping rule has a `users:` section and a `repos:` section, each
|
|
4
|
+
containing one or more matchers (today: `authProvider`,
|
|
5
|
+
`codeHostConnection`, and `regex`). Within a matcher, the supplied
|
|
6
|
+
keys AND together against the discovered auth-provider / external-
|
|
7
|
+
service entries. Across mapping rules, `cmd_set` unions the per-repo
|
|
8
|
+
user sets at apply time — see `src/src_auth_perms_sync/permissions/types.py` for the rationale.
|
|
9
|
+
|
|
10
|
+
Adding a new matcher type:
|
|
11
|
+
|
|
12
|
+
1. Add the TypedDict in `src/src_auth_perms_sync/permissions/types.py`.
|
|
13
|
+
2. Add it as a sibling key on `UsersFilter` or `ReposFilter`.
|
|
14
|
+
3. Add a branch in `resolve_users` / `resolve_repos` below.
|
|
15
|
+
4. Add structural validation in `validate_mapping_rules`.
|
|
16
|
+
5. Add an example rule using the new matcher to `maps-example.yaml`.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import logging
|
|
22
|
+
import re
|
|
23
|
+
from collections.abc import Mapping
|
|
24
|
+
from typing import Any, cast
|
|
25
|
+
|
|
26
|
+
import json5
|
|
27
|
+
|
|
28
|
+
from ..shared import id_codec, saml_groups
|
|
29
|
+
from ..shared import types as shared_types
|
|
30
|
+
from . import types as permission_types
|
|
31
|
+
|
|
32
|
+
log = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# Sets of allowed matcher field names, used by the structural
|
|
36
|
+
# validators to reject typos. The mapping from matcher key to
|
|
37
|
+
# discovered-entry key is hard-coded inside `_providers_matching` /
|
|
38
|
+
# `_services_matching` (only `authProvider.type` differs:
|
|
39
|
+
# matcher `type` ↔ AuthProvider `serviceType`).
|
|
40
|
+
# Discovered-provider fields that AND together inside `_providers_matching`.
|
|
41
|
+
# `samlGroup` is allowed under `authProvider:` too but is not a provider
|
|
42
|
+
# field — it filters within the matched provider's users (see
|
|
43
|
+
# `_users_matching_auth_provider`).
|
|
44
|
+
AUTH_PROVIDER_MATCHER_FIELDS: set[str] = {
|
|
45
|
+
"type",
|
|
46
|
+
"serviceID",
|
|
47
|
+
"clientID",
|
|
48
|
+
"displayName",
|
|
49
|
+
"configID",
|
|
50
|
+
"samlGroup",
|
|
51
|
+
}
|
|
52
|
+
CODE_HOST_MATCHER_FIELDS: set[str] = {"id", "kind", "displayName", "url", "config"}
|
|
53
|
+
AUTH_PROVIDER_VALUE_MATCHES: tuple[tuple[str, str], ...] = (
|
|
54
|
+
("type", "serviceType"),
|
|
55
|
+
("serviceID", "serviceID"),
|
|
56
|
+
("clientID", "clientID"),
|
|
57
|
+
("displayName", "displayName"),
|
|
58
|
+
("configID", "configID"),
|
|
59
|
+
)
|
|
60
|
+
CODE_HOST_VALUE_MATCHES: tuple[str, ...] = ("kind", "displayName", "url")
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# ---------------------------------------------------------------------------
|
|
64
|
+
# Validation (structural; cheap, runs before any GraphQL call)
|
|
65
|
+
# ---------------------------------------------------------------------------
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def validate_mapping_rules(rules: list[permission_types.MappingRule]) -> None:
|
|
69
|
+
"""Fail fast on structural problems in the YAML before doing any work.
|
|
70
|
+
|
|
71
|
+
Catches operator typos that would otherwise produce confusing partial
|
|
72
|
+
results (or silent matches against the wrong set of users/repos)
|
|
73
|
+
only after a full instance scan. Raises SystemExit with all
|
|
74
|
+
collected errors at once so the operator gets one clear diagnostic
|
|
75
|
+
instead of fix-one-find-the-next.
|
|
76
|
+
|
|
77
|
+
Semantic warnings (e.g. an authProvider matcher with no fields set,
|
|
78
|
+
which would match every provider on the instance) are logged at
|
|
79
|
+
apply time by the resolver, not raised here — they're not always
|
|
80
|
+
bugs.
|
|
81
|
+
"""
|
|
82
|
+
errors: list[str] = []
|
|
83
|
+
for rule_index, rule in enumerate(rules, start=1):
|
|
84
|
+
label = rule.get("name") or f"<unnamed rule #{rule_index}>"
|
|
85
|
+
prefix = f"mapping {rule_index} ({label!r})"
|
|
86
|
+
|
|
87
|
+
users_section = cast(dict[str, object], rule.get("users") or {})
|
|
88
|
+
repos_section = cast(dict[str, object], rule.get("repos") or {})
|
|
89
|
+
|
|
90
|
+
if not users_section:
|
|
91
|
+
errors.append(f"{prefix}: `users:` section is empty (matches no users)")
|
|
92
|
+
if not repos_section:
|
|
93
|
+
errors.append(f"{prefix}: `repos:` section is empty (matches no repos)")
|
|
94
|
+
|
|
95
|
+
errors.extend(_validate_users_section(users_section, prefix))
|
|
96
|
+
errors.extend(_validate_repos_section(repos_section, prefix))
|
|
97
|
+
|
|
98
|
+
if errors:
|
|
99
|
+
bullet = "\n - "
|
|
100
|
+
raise SystemExit(
|
|
101
|
+
f"FATAL: {len(errors)} mapping configuration error(s):" + bullet + bullet.join(errors)
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
_KNOWN_USER_MATCHERS: set[str] = {"authProvider"}
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _validate_users_section(section: dict[str, object], prefix: str) -> list[str]:
|
|
109
|
+
"""Reject unknown matcher keys and validate each matcher's shape."""
|
|
110
|
+
errors: list[str] = []
|
|
111
|
+
for key in section:
|
|
112
|
+
if key not in _KNOWN_USER_MATCHERS:
|
|
113
|
+
errors.append(f"{prefix}: unknown users matcher {key!r}")
|
|
114
|
+
auth_provider = cast(dict[str, object] | None, section.get("authProvider"))
|
|
115
|
+
if auth_provider is not None:
|
|
116
|
+
unknown = set(auth_provider) - AUTH_PROVIDER_MATCHER_FIELDS
|
|
117
|
+
for field_name in sorted(unknown):
|
|
118
|
+
errors.append(f"{prefix}: unknown authProvider field {field_name!r}")
|
|
119
|
+
if not auth_provider:
|
|
120
|
+
errors.append(
|
|
121
|
+
f"{prefix}: authProvider is empty (would match every provider on the instance)"
|
|
122
|
+
)
|
|
123
|
+
if "samlGroup" in auth_provider:
|
|
124
|
+
errors.extend(_validate_saml_group(auth_provider, prefix))
|
|
125
|
+
return errors
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _validate_saml_group(auth_provider: dict[str, object], prefix: str) -> list[str]:
|
|
129
|
+
"""`authProvider.samlGroup`, if present, must be a non-empty string and
|
|
130
|
+
incompatible with a non-SAML `type:` (the rule could never match).
|
|
131
|
+
"""
|
|
132
|
+
errors: list[str] = []
|
|
133
|
+
value = auth_provider["samlGroup"]
|
|
134
|
+
if not isinstance(value, str):
|
|
135
|
+
errors.append(
|
|
136
|
+
f"{prefix}: authProvider.samlGroup must be a single group-name "
|
|
137
|
+
f"string (got {type(value).__name__} {value!r}); to OR multiple "
|
|
138
|
+
f"groups, write multiple rules"
|
|
139
|
+
)
|
|
140
|
+
elif not value:
|
|
141
|
+
errors.append(f"{prefix}: authProvider.samlGroup is an empty string")
|
|
142
|
+
declared_type = auth_provider.get("type")
|
|
143
|
+
if (
|
|
144
|
+
isinstance(declared_type, str)
|
|
145
|
+
and declared_type
|
|
146
|
+
and declared_type != saml_groups.SAML_SERVICE_TYPE
|
|
147
|
+
):
|
|
148
|
+
errors.append(
|
|
149
|
+
f"{prefix}: authProvider.samlGroup is set but authProvider.type "
|
|
150
|
+
f"is {declared_type!r}; only SAML providers carry group claims"
|
|
151
|
+
)
|
|
152
|
+
return errors
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _validate_repos_section(section: dict[str, object], prefix: str) -> list[str]:
|
|
156
|
+
"""Reject unknown matcher keys and validate `codeHostConnection:` shape."""
|
|
157
|
+
errors: list[str] = []
|
|
158
|
+
for key in section:
|
|
159
|
+
if key not in {"codeHostConnection", "regex"}:
|
|
160
|
+
errors.append(f"{prefix}: unknown repos matcher {key!r}")
|
|
161
|
+
code_host_section = cast(dict[str, object] | None, section.get("codeHostConnection"))
|
|
162
|
+
if code_host_section is not None:
|
|
163
|
+
unknown = set(code_host_section) - CODE_HOST_MATCHER_FIELDS
|
|
164
|
+
for field_name in sorted(unknown):
|
|
165
|
+
errors.append(f"{prefix}: unknown codeHostConnection field {field_name!r}")
|
|
166
|
+
if not (set(code_host_section) & CODE_HOST_MATCHER_FIELDS):
|
|
167
|
+
errors.append(
|
|
168
|
+
f"{prefix}: codeHostConnection is empty (would match every "
|
|
169
|
+
f"external service on the instance); supply at least one of "
|
|
170
|
+
f"{sorted(CODE_HOST_MATCHER_FIELDS)}"
|
|
171
|
+
)
|
|
172
|
+
if "id" in code_host_section:
|
|
173
|
+
external_service_id = code_host_section["id"]
|
|
174
|
+
if external_service_id is None or external_service_id == "":
|
|
175
|
+
errors.append(
|
|
176
|
+
f"{prefix}: codeHostConnection.id, if supplied, must be "
|
|
177
|
+
f"a non-empty integer (e.g. `id: 5`)"
|
|
178
|
+
)
|
|
179
|
+
elif not isinstance(external_service_id, int) or isinstance(external_service_id, bool):
|
|
180
|
+
errors.append(
|
|
181
|
+
f"{prefix}: codeHostConnection.id must be an integer "
|
|
182
|
+
f"(got {type(external_service_id).__name__} {external_service_id!r}); "
|
|
183
|
+
f"the YAML config holds the decoded DB primary key, not the "
|
|
184
|
+
f"opaque base64 GraphQL Node ID"
|
|
185
|
+
)
|
|
186
|
+
if "config" in code_host_section and not isinstance(code_host_section["config"], dict):
|
|
187
|
+
errors.append(
|
|
188
|
+
f"{prefix}: codeHostConnection.config must be a mapping of "
|
|
189
|
+
f"key/value pairs to deep-subset-match against the service's "
|
|
190
|
+
f"parsed config (got {type(code_host_section['config']).__name__})"
|
|
191
|
+
)
|
|
192
|
+
regex = section.get("regex")
|
|
193
|
+
if regex is not None:
|
|
194
|
+
if not isinstance(regex, str):
|
|
195
|
+
errors.append(f"{prefix}: repos.regex must be a string (got {type(regex).__name__})")
|
|
196
|
+
elif not regex:
|
|
197
|
+
errors.append(f"{prefix}: repos.regex is an empty string")
|
|
198
|
+
else:
|
|
199
|
+
try:
|
|
200
|
+
re.compile(regex)
|
|
201
|
+
except re.error as exception:
|
|
202
|
+
errors.append(f"{prefix}: repos.regex is not a valid Python regex: {exception}")
|
|
203
|
+
return errors
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
# ---------------------------------------------------------------------------
|
|
207
|
+
# Users resolution
|
|
208
|
+
# ---------------------------------------------------------------------------
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def resolve_users(
|
|
212
|
+
section: dict[str, object],
|
|
213
|
+
all_users: list[shared_types.User],
|
|
214
|
+
all_providers: list[shared_types.AuthProvider],
|
|
215
|
+
saml_groups_attribute_names: saml_groups.SamlGroupsAttributeNameByProvider | None = None,
|
|
216
|
+
) -> list[shared_types.User]:
|
|
217
|
+
"""Return users matching ALL matchers under `users:` (intersection).
|
|
218
|
+
|
|
219
|
+
`saml_groups_attribute_names` overrides the default `"groups"` SAML
|
|
220
|
+
assertion attribute name per (serviceID, clientID) — see
|
|
221
|
+
`src/src_auth_perms_sync/shared/saml_groups.py`. When
|
|
222
|
+
`None`, every SAML provider falls back to the default. Only
|
|
223
|
+
consulted by the `authProvider.samlGroup` sub-field.
|
|
224
|
+
|
|
225
|
+
Empty section returns an empty user set — `validate_mapping_rules`
|
|
226
|
+
rejects this at config-load time, so this branch only fires for
|
|
227
|
+
programmatic callers.
|
|
228
|
+
"""
|
|
229
|
+
if not section:
|
|
230
|
+
return []
|
|
231
|
+
|
|
232
|
+
users_by_id: dict[str, shared_types.User] = {user["id"]: user for user in all_users}
|
|
233
|
+
matched_ids: set[str] | None = None
|
|
234
|
+
for key, matcher in section.items():
|
|
235
|
+
if key == "authProvider":
|
|
236
|
+
current_ids = {
|
|
237
|
+
user["id"]
|
|
238
|
+
for user in _users_matching_auth_provider(
|
|
239
|
+
cast(permission_types.AuthProviderMatcher, matcher),
|
|
240
|
+
all_users,
|
|
241
|
+
all_providers,
|
|
242
|
+
saml_groups_attribute_names,
|
|
243
|
+
)
|
|
244
|
+
}
|
|
245
|
+
else:
|
|
246
|
+
# validate_mapping_rules catches this earlier with a clearer
|
|
247
|
+
# message; this only fires for programmatic callers.
|
|
248
|
+
raise ValueError(f"unknown users matcher {key!r}")
|
|
249
|
+
matched_ids = current_ids if matched_ids is None else matched_ids & current_ids
|
|
250
|
+
if not matched_ids:
|
|
251
|
+
return []
|
|
252
|
+
assert matched_ids is not None
|
|
253
|
+
return [users_by_id[user_id] for user_id in matched_ids]
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def user_matches_users_section(
|
|
257
|
+
section: dict[str, object],
|
|
258
|
+
user: shared_types.User,
|
|
259
|
+
all_providers: list[shared_types.AuthProvider],
|
|
260
|
+
saml_groups_attribute_names: saml_groups.SamlGroupsAttributeNameByProvider | None = None,
|
|
261
|
+
) -> bool:
|
|
262
|
+
"""Return whether one user matches ALL matchers under `users:`."""
|
|
263
|
+
if not section:
|
|
264
|
+
return False
|
|
265
|
+
|
|
266
|
+
for key, matcher in section.items():
|
|
267
|
+
if key == "authProvider":
|
|
268
|
+
if not _user_matches_auth_provider(
|
|
269
|
+
cast(permission_types.AuthProviderMatcher, matcher),
|
|
270
|
+
user,
|
|
271
|
+
all_providers,
|
|
272
|
+
saml_groups_attribute_names,
|
|
273
|
+
):
|
|
274
|
+
return False
|
|
275
|
+
else:
|
|
276
|
+
# validate_mapping_rules catches this earlier with a clearer
|
|
277
|
+
# message; this only fires for programmatic callers.
|
|
278
|
+
raise ValueError(f"unknown users matcher {key!r}")
|
|
279
|
+
return True
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def _users_matching_auth_provider(
|
|
283
|
+
matcher: permission_types.AuthProviderMatcher,
|
|
284
|
+
all_users: list[shared_types.User],
|
|
285
|
+
all_providers: list[shared_types.AuthProvider],
|
|
286
|
+
saml_groups_attribute_names: saml_groups.SamlGroupsAttributeNameByProvider | None,
|
|
287
|
+
) -> list[shared_types.User]:
|
|
288
|
+
"""Resolve `authProvider:` (and its optional `samlGroup:` sub-field)
|
|
289
|
+
to the users it selects.
|
|
290
|
+
|
|
291
|
+
When `samlGroup` is present, the matched-providers set is narrowed
|
|
292
|
+
to SAML providers (group claims only exist there) and each user
|
|
293
|
+
must additionally have that group named in the assertion stored on
|
|
294
|
+
their account in one of those providers.
|
|
295
|
+
"""
|
|
296
|
+
saml_group = matcher.get("samlGroup")
|
|
297
|
+
matching_providers = _providers_matching(all_providers, matcher)
|
|
298
|
+
if saml_group:
|
|
299
|
+
matching_providers = [
|
|
300
|
+
provider
|
|
301
|
+
for provider in matching_providers
|
|
302
|
+
if provider["serviceType"] == saml_groups.SAML_SERVICE_TYPE
|
|
303
|
+
]
|
|
304
|
+
if not matching_providers:
|
|
305
|
+
log.warning(
|
|
306
|
+
" authProvider matcher matched zero providers (%s).",
|
|
307
|
+
_format_matcher(cast(dict[str, object], matcher)),
|
|
308
|
+
)
|
|
309
|
+
return []
|
|
310
|
+
for provider in matching_providers:
|
|
311
|
+
log.info(
|
|
312
|
+
" authProvider → %s (type=%s serviceID=%s clientID=%s)",
|
|
313
|
+
provider["displayName"],
|
|
314
|
+
provider["serviceType"],
|
|
315
|
+
provider["serviceID"],
|
|
316
|
+
provider["clientID"],
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
matched: dict[str, shared_types.User] = {}
|
|
320
|
+
for provider in matching_providers:
|
|
321
|
+
if saml_group:
|
|
322
|
+
attribute_name = saml_groups.attribute_name_for(
|
|
323
|
+
saml_groups_attribute_names,
|
|
324
|
+
provider["serviceID"],
|
|
325
|
+
provider["clientID"],
|
|
326
|
+
)
|
|
327
|
+
for user in all_users:
|
|
328
|
+
if _user_has_saml_group_in_provider(user, provider, saml_group, attribute_name):
|
|
329
|
+
matched[user["id"]] = user
|
|
330
|
+
else:
|
|
331
|
+
for user in all_users:
|
|
332
|
+
if _user_has_account_in(user, provider):
|
|
333
|
+
matched[user["id"]] = user
|
|
334
|
+
if saml_group:
|
|
335
|
+
log.info(
|
|
336
|
+
" samlGroup → %d user(s) in group %r",
|
|
337
|
+
len(matched),
|
|
338
|
+
saml_group,
|
|
339
|
+
)
|
|
340
|
+
return list(matched.values())
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def _user_matches_auth_provider(
|
|
344
|
+
matcher: permission_types.AuthProviderMatcher,
|
|
345
|
+
user: shared_types.User,
|
|
346
|
+
all_providers: list[shared_types.AuthProvider],
|
|
347
|
+
saml_groups_attribute_names: saml_groups.SamlGroupsAttributeNameByProvider | None,
|
|
348
|
+
) -> bool:
|
|
349
|
+
"""Return whether a single user matches an `authProvider:` matcher."""
|
|
350
|
+
saml_group = matcher.get("samlGroup")
|
|
351
|
+
matching_providers = _providers_matching(all_providers, matcher)
|
|
352
|
+
if saml_group:
|
|
353
|
+
matching_providers = [
|
|
354
|
+
provider
|
|
355
|
+
for provider in matching_providers
|
|
356
|
+
if provider["serviceType"] == saml_groups.SAML_SERVICE_TYPE
|
|
357
|
+
]
|
|
358
|
+
if not matching_providers:
|
|
359
|
+
return False
|
|
360
|
+
|
|
361
|
+
for provider in matching_providers:
|
|
362
|
+
if saml_group:
|
|
363
|
+
attribute_name = saml_groups.attribute_name_for(
|
|
364
|
+
saml_groups_attribute_names,
|
|
365
|
+
provider["serviceID"],
|
|
366
|
+
provider["clientID"],
|
|
367
|
+
)
|
|
368
|
+
if _user_has_saml_group_in_provider(user, provider, saml_group, attribute_name):
|
|
369
|
+
return True
|
|
370
|
+
elif _user_has_account_in(user, provider):
|
|
371
|
+
return True
|
|
372
|
+
return False
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def _providers_matching(
|
|
376
|
+
providers: list[shared_types.AuthProvider],
|
|
377
|
+
matcher: permission_types.AuthProviderMatcher,
|
|
378
|
+
) -> list[shared_types.AuthProvider]:
|
|
379
|
+
"""AND across the supplied matcher fields. The matcher's `type` key
|
|
380
|
+
maps to the GraphQL `serviceType` field; everything else has the
|
|
381
|
+
same name on both sides.
|
|
382
|
+
"""
|
|
383
|
+
matched: list[shared_types.AuthProvider] = []
|
|
384
|
+
matcher_values = cast(Mapping[str, object], matcher)
|
|
385
|
+
for provider in providers:
|
|
386
|
+
provider_values = cast(Mapping[str, object], provider)
|
|
387
|
+
if not all(
|
|
388
|
+
matcher_key not in matcher_values
|
|
389
|
+
or matcher_values[matcher_key] == provider_values[provider_key]
|
|
390
|
+
for matcher_key, provider_key in AUTH_PROVIDER_VALUE_MATCHES
|
|
391
|
+
):
|
|
392
|
+
continue
|
|
393
|
+
matched.append(provider)
|
|
394
|
+
return matched
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def _user_has_account_in(user: shared_types.User, provider: shared_types.AuthProvider) -> bool:
|
|
398
|
+
"""Return whether `user` has an account matching `provider`."""
|
|
399
|
+
if provider["serviceType"] == "builtin":
|
|
400
|
+
return bool(user.get("builtinAuth"))
|
|
401
|
+
for account in user["externalAccounts"]["nodes"]:
|
|
402
|
+
if (
|
|
403
|
+
account["serviceType"] == provider["serviceType"]
|
|
404
|
+
and account["serviceID"] == provider["serviceID"]
|
|
405
|
+
and account["clientID"] == provider["clientID"]
|
|
406
|
+
):
|
|
407
|
+
return True
|
|
408
|
+
return False
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
def _user_has_saml_group_in_provider(
|
|
412
|
+
user: shared_types.User,
|
|
413
|
+
provider: shared_types.AuthProvider,
|
|
414
|
+
saml_group: str,
|
|
415
|
+
attribute_name: str,
|
|
416
|
+
) -> bool:
|
|
417
|
+
"""Return whether `user` has `saml_group` in one SAML provider account."""
|
|
418
|
+
for account in user["externalAccounts"]["nodes"]:
|
|
419
|
+
if (
|
|
420
|
+
account["serviceType"] == saml_groups.SAML_SERVICE_TYPE
|
|
421
|
+
and account["serviceID"] == provider["serviceID"]
|
|
422
|
+
and account["clientID"] == provider["clientID"]
|
|
423
|
+
and saml_group
|
|
424
|
+
in saml_groups.extract_saml_groups(account.get("accountData"), attribute_name)
|
|
425
|
+
):
|
|
426
|
+
return True
|
|
427
|
+
return False
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
# ---------------------------------------------------------------------------
|
|
431
|
+
# Repos resolution
|
|
432
|
+
# ---------------------------------------------------------------------------
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def resolve_repos(
|
|
436
|
+
section: dict[str, object],
|
|
437
|
+
services_by_id: dict[int, permission_types.ExternalService],
|
|
438
|
+
repos_by_external_service_id: dict[int, list[permission_types.Repository]],
|
|
439
|
+
all_repos_by_id: dict[str, permission_types.Repository],
|
|
440
|
+
) -> list[permission_types.Repository]:
|
|
441
|
+
"""Return repos matching ALL matchers under `repos:` (intersection).
|
|
442
|
+
|
|
443
|
+
Empty section returns an empty repo set; `validate_mapping_rules`
|
|
444
|
+
rejects this at config-load time.
|
|
445
|
+
"""
|
|
446
|
+
if not section:
|
|
447
|
+
return []
|
|
448
|
+
|
|
449
|
+
matched_ids: set[str] | None = None
|
|
450
|
+
repo_index: dict[str, permission_types.Repository] = {}
|
|
451
|
+
ordered_keys = [key for key in ("codeHostConnection", "regex") if key in section]
|
|
452
|
+
for key in ordered_keys:
|
|
453
|
+
matcher = section[key]
|
|
454
|
+
if key == "codeHostConnection":
|
|
455
|
+
repos = _repos_matching_code_host_connection(
|
|
456
|
+
cast(permission_types.CodeHostConnectionMatcher, matcher),
|
|
457
|
+
services_by_id,
|
|
458
|
+
repos_by_external_service_id,
|
|
459
|
+
)
|
|
460
|
+
elif key == "regex":
|
|
461
|
+
candidate_repos = (
|
|
462
|
+
[repo_index[repo_id] for repo_id in matched_ids]
|
|
463
|
+
if matched_ids is not None
|
|
464
|
+
else list(all_repos_by_id.values())
|
|
465
|
+
)
|
|
466
|
+
repos = _repos_matching_regex(cast(str, matcher), candidate_repos)
|
|
467
|
+
else:
|
|
468
|
+
# validate_mapping_rules catches this earlier with a clearer
|
|
469
|
+
# message; this only fires for programmatic callers.
|
|
470
|
+
raise ValueError(f"unknown repos matcher {key!r}")
|
|
471
|
+
current_ids = {repo["id"] for repo in repos}
|
|
472
|
+
for repo in repos:
|
|
473
|
+
repo_index[repo["id"]] = repo
|
|
474
|
+
matched_ids = current_ids if matched_ids is None else matched_ids & current_ids
|
|
475
|
+
if not matched_ids:
|
|
476
|
+
return []
|
|
477
|
+
assert matched_ids is not None
|
|
478
|
+
return [repo_index[repo_id] for repo_id in matched_ids]
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
def _repos_matching_code_host_connection(
|
|
482
|
+
matcher: permission_types.CodeHostConnectionMatcher,
|
|
483
|
+
services_by_id: dict[int, permission_types.ExternalService],
|
|
484
|
+
repos_by_external_service_id: dict[int, list[permission_types.Repository]],
|
|
485
|
+
) -> list[permission_types.Repository]:
|
|
486
|
+
matching_services = _services_matching(services_by_id, matcher)
|
|
487
|
+
if not matching_services:
|
|
488
|
+
log.warning(
|
|
489
|
+
" codeHostConnection matcher matched zero services (%s).",
|
|
490
|
+
_format_matcher(cast(dict[str, object], matcher)),
|
|
491
|
+
)
|
|
492
|
+
return []
|
|
493
|
+
matched_repos: dict[str, permission_types.Repository] = {}
|
|
494
|
+
for service in matching_services:
|
|
495
|
+
log.info(
|
|
496
|
+
" codeHostConnection → %s (id=%d kind=%s)",
|
|
497
|
+
service["displayName"],
|
|
498
|
+
id_codec.decode_external_service_id(service["id"]),
|
|
499
|
+
service["kind"],
|
|
500
|
+
)
|
|
501
|
+
external_service_id = id_codec.decode_external_service_id(service["id"])
|
|
502
|
+
for repo in repos_by_external_service_id.get(external_service_id, []):
|
|
503
|
+
matched_repos[repo["id"]] = repo
|
|
504
|
+
return list(matched_repos.values())
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
def _repos_matching_regex(
|
|
508
|
+
pattern: str, repos: list[permission_types.Repository]
|
|
509
|
+
) -> list[permission_types.Repository]:
|
|
510
|
+
"""Return repos whose name matches `pattern` using Python `re`.
|
|
511
|
+
|
|
512
|
+
Sourcegraph repo names usually omit the URL scheme (for example
|
|
513
|
+
`github.com/example/repo`). To keep URL-looking operator patterns
|
|
514
|
+
useful, also test `https://<repo name>`.
|
|
515
|
+
"""
|
|
516
|
+
compiled = re.compile(pattern)
|
|
517
|
+
matched = [
|
|
518
|
+
repo
|
|
519
|
+
for repo in repos
|
|
520
|
+
if compiled.search(repo["name"]) or compiled.search(f"https://{repo['name']}")
|
|
521
|
+
]
|
|
522
|
+
log.info(" regex → %d repo(s) matched %r", len(matched), pattern)
|
|
523
|
+
return matched
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
def _services_matching(
|
|
527
|
+
services_by_id: dict[int, permission_types.ExternalService],
|
|
528
|
+
matcher: permission_types.CodeHostConnectionMatcher,
|
|
529
|
+
) -> list[permission_types.ExternalService]:
|
|
530
|
+
"""AND across the supplied matcher fields. If `id` is supplied we
|
|
531
|
+
short-circuit to a single candidate; remaining fields then act as a
|
|
532
|
+
defensive cross-check against an ES recreated/renamed under the
|
|
533
|
+
same id. Without `id`, every other supplied field is a primary
|
|
534
|
+
discriminator across the full service list.
|
|
535
|
+
"""
|
|
536
|
+
if "id" in matcher:
|
|
537
|
+
single_service = services_by_id.get(matcher["id"])
|
|
538
|
+
if single_service is None:
|
|
539
|
+
return []
|
|
540
|
+
candidates = [single_service]
|
|
541
|
+
else:
|
|
542
|
+
candidates = list(services_by_id.values())
|
|
543
|
+
|
|
544
|
+
matched: list[permission_types.ExternalService] = []
|
|
545
|
+
matcher_values = cast(Mapping[str, object], matcher)
|
|
546
|
+
for service in candidates:
|
|
547
|
+
service_values = cast(Mapping[str, object], service)
|
|
548
|
+
if not all(
|
|
549
|
+
field_name not in matcher_values
|
|
550
|
+
or matcher_values[field_name] == service_values[field_name]
|
|
551
|
+
for field_name in CODE_HOST_VALUE_MATCHES
|
|
552
|
+
):
|
|
553
|
+
continue
|
|
554
|
+
if "config" in matcher and not _config_subset_matches(
|
|
555
|
+
matcher["config"], _parsed_service_config(service)
|
|
556
|
+
):
|
|
557
|
+
continue
|
|
558
|
+
matched.append(service)
|
|
559
|
+
return matched
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
def _parsed_service_config(service: permission_types.ExternalService) -> dict[str, Any]:
|
|
563
|
+
"""Best-effort parse of `ExternalService.config` (JSONC string).
|
|
564
|
+
|
|
565
|
+
Returns an empty dict if the config is missing or unparseable —
|
|
566
|
+
callers treat that as "no keys to match against", so a `config:`
|
|
567
|
+
matcher against such a service simply fails to match instead of
|
|
568
|
+
raising. Sourcegraph's resolver returns a JSON object string, so
|
|
569
|
+
parse failures here are anomalies worth not crashing on.
|
|
570
|
+
"""
|
|
571
|
+
raw_config = service.get("config")
|
|
572
|
+
if not raw_config:
|
|
573
|
+
return {}
|
|
574
|
+
try:
|
|
575
|
+
parsed = cast(Any, json5.loads(raw_config))
|
|
576
|
+
except ValueError:
|
|
577
|
+
return {}
|
|
578
|
+
if not isinstance(parsed, dict):
|
|
579
|
+
return {}
|
|
580
|
+
return cast(dict[str, Any], parsed)
|
|
581
|
+
|
|
582
|
+
|
|
583
|
+
def _config_subset_matches(matcher_config: dict[str, Any], service_config: dict[str, Any]) -> bool:
|
|
584
|
+
"""True iff every key in `matcher_config` is present in `service_config`
|
|
585
|
+
with a matching value. Nested dicts are matched recursively
|
|
586
|
+
(subset semantics); lists and scalars are matched by equality.
|
|
587
|
+
|
|
588
|
+
Sourcegraph's `REDACTED` sentinel is left as-is on the service side:
|
|
589
|
+
a matcher that names a redacted key (e.g. `token`) compares against
|
|
590
|
+
the literal `"REDACTED"` string and almost certainly fails to
|
|
591
|
+
match — exactly the semantics we want, since the operator can't
|
|
592
|
+
have known the real secret value.
|
|
593
|
+
"""
|
|
594
|
+
for key, expected in matcher_config.items():
|
|
595
|
+
if key not in service_config:
|
|
596
|
+
return False
|
|
597
|
+
actual = service_config[key]
|
|
598
|
+
if isinstance(expected, dict) and isinstance(actual, dict):
|
|
599
|
+
if not _config_subset_matches(
|
|
600
|
+
cast(dict[str, Any], expected), cast(dict[str, Any], actual)
|
|
601
|
+
):
|
|
602
|
+
return False
|
|
603
|
+
continue
|
|
604
|
+
if expected != actual:
|
|
605
|
+
return False
|
|
606
|
+
return True
|
|
607
|
+
|
|
608
|
+
|
|
609
|
+
def referenced_external_service_ids(rules: list[permission_types.MappingRule]) -> set[int]:
|
|
610
|
+
"""Collect all external_service IDs referenced by the mapping rules.
|
|
611
|
+
|
|
612
|
+
Returns integer DB primary keys (the YAML-facing form). Used by
|
|
613
|
+
`cmd_set` to pre-flight-warn about any IDs that the live instance
|
|
614
|
+
doesn't know about, before per-mapping resolution runs.
|
|
615
|
+
"""
|
|
616
|
+
referenced: set[int] = set()
|
|
617
|
+
for rule in rules:
|
|
618
|
+
repos_section = rule.get("repos") or {}
|
|
619
|
+
code_host_section = repos_section.get("codeHostConnection")
|
|
620
|
+
if code_host_section and "id" in code_host_section:
|
|
621
|
+
referenced.add(code_host_section["id"])
|
|
622
|
+
return referenced
|
|
623
|
+
|
|
624
|
+
|
|
625
|
+
def _format_matcher(matcher: dict[str, object]) -> str:
|
|
626
|
+
"""Render a matcher dict as `key1=value1 key2=value2` for log output."""
|
|
627
|
+
return " ".join(f"{key}={value!r}" for key, value in matcher.items())
|