src-auth-perms-sync 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. src_auth_perms_sync/__init__.py +1 -0
  2. src_auth_perms_sync/__main__.py +6 -0
  3. src_auth_perms_sync/cli.py +646 -0
  4. src_auth_perms_sync/orgs/__init__.py +1 -0
  5. src_auth_perms_sync/orgs/command.py +7 -0
  6. src_auth_perms_sync/orgs/queries.py +44 -0
  7. src_auth_perms_sync/orgs/sync.py +1167 -0
  8. src_auth_perms_sync/orgs/types.py +103 -0
  9. src_auth_perms_sync/permissions/__init__.py +1 -0
  10. src_auth_perms_sync/permissions/apply.py +420 -0
  11. src_auth_perms_sync/permissions/command.py +918 -0
  12. src_auth_perms_sync/permissions/full_set.py +880 -0
  13. src_auth_perms_sync/permissions/mapping.py +627 -0
  14. src_auth_perms_sync/permissions/maps.py +291 -0
  15. src_auth_perms_sync/permissions/queries.py +180 -0
  16. src_auth_perms_sync/permissions/restore.py +913 -0
  17. src_auth_perms_sync/permissions/snapshot.py +1502 -0
  18. src_auth_perms_sync/permissions/sourcegraph.py +392 -0
  19. src_auth_perms_sync/permissions/types.py +116 -0
  20. src_auth_perms_sync/permissions/workflow.py +526 -0
  21. src_auth_perms_sync/shared/__init__.py +1 -0
  22. src_auth_perms_sync/shared/backups.py +119 -0
  23. src_auth_perms_sync/shared/id_codec.py +67 -0
  24. src_auth_perms_sync/shared/queries.py +65 -0
  25. src_auth_perms_sync/shared/run_context.py +34 -0
  26. src_auth_perms_sync/shared/saml_groups.py +267 -0
  27. src_auth_perms_sync/shared/site_config.py +366 -0
  28. src_auth_perms_sync/shared/sourcegraph.py +69 -0
  29. src_auth_perms_sync/shared/types.py +69 -0
  30. src_auth_perms_sync-0.2.1.dist-info/METADATA +256 -0
  31. src_auth_perms_sync-0.2.1.dist-info/RECORD +34 -0
  32. src_auth_perms_sync-0.2.1.dist-info/WHEEL +4 -0
  33. src_auth_perms_sync-0.2.1.dist-info/entry_points.txt +2 -0
  34. src_auth_perms_sync-0.2.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,627 @@
1
+ """Permission mapping resolution: validate rules and match users/repos.
2
+
3
+ Each mapping rule has a `users:` section and a `repos:` section, each
4
+ containing one or more matchers (today: `authProvider`,
5
+ `codeHostConnection`, and `regex`). Within a matcher, the supplied
6
+ keys AND together against the discovered auth-provider / external-
7
+ service entries. Across mapping rules, `cmd_set` unions the per-repo
8
+ user sets at apply time — see `src/src_auth_perms_sync/permissions/types.py` for the rationale.
9
+
10
+ Adding a new matcher type:
11
+
12
+ 1. Add the TypedDict in `src/src_auth_perms_sync/permissions/types.py`.
13
+ 2. Add it as a sibling key on `UsersFilter` or `ReposFilter`.
14
+ 3. Add a branch in `resolve_users` / `resolve_repos` below.
15
+ 4. Add structural validation in `validate_mapping_rules`.
16
+ 5. Add an example rule using the new matcher to `maps-example.yaml`.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import logging
22
+ import re
23
+ from collections.abc import Mapping
24
+ from typing import Any, cast
25
+
26
+ import json5
27
+
28
+ from ..shared import id_codec, saml_groups
29
+ from ..shared import types as shared_types
30
+ from . import types as permission_types
31
+
32
+ log = logging.getLogger(__name__)
33
+
34
+
35
+ # Sets of allowed matcher field names, used by the structural
36
+ # validators to reject typos. The mapping from matcher key to
37
+ # discovered-entry key is hard-coded inside `_providers_matching` /
38
+ # `_services_matching` (only `authProvider.type` differs:
39
+ # matcher `type` ↔ AuthProvider `serviceType`).
40
+ # Discovered-provider fields that AND together inside `_providers_matching`.
41
+ # `samlGroup` is allowed under `authProvider:` too but is not a provider
42
+ # field — it filters within the matched provider's users (see
43
+ # `_users_matching_auth_provider`).
44
+ AUTH_PROVIDER_MATCHER_FIELDS: set[str] = {
45
+ "type",
46
+ "serviceID",
47
+ "clientID",
48
+ "displayName",
49
+ "configID",
50
+ "samlGroup",
51
+ }
52
+ CODE_HOST_MATCHER_FIELDS: set[str] = {"id", "kind", "displayName", "url", "config"}
53
+ AUTH_PROVIDER_VALUE_MATCHES: tuple[tuple[str, str], ...] = (
54
+ ("type", "serviceType"),
55
+ ("serviceID", "serviceID"),
56
+ ("clientID", "clientID"),
57
+ ("displayName", "displayName"),
58
+ ("configID", "configID"),
59
+ )
60
+ CODE_HOST_VALUE_MATCHES: tuple[str, ...] = ("kind", "displayName", "url")
61
+
62
+
63
+ # ---------------------------------------------------------------------------
64
+ # Validation (structural; cheap, runs before any GraphQL call)
65
+ # ---------------------------------------------------------------------------
66
+
67
+
68
+ def validate_mapping_rules(rules: list[permission_types.MappingRule]) -> None:
69
+ """Fail fast on structural problems in the YAML before doing any work.
70
+
71
+ Catches operator typos that would otherwise produce confusing partial
72
+ results (or silent matches against the wrong set of users/repos)
73
+ only after a full instance scan. Raises SystemExit with all
74
+ collected errors at once so the operator gets one clear diagnostic
75
+ instead of fix-one-find-the-next.
76
+
77
+ Semantic warnings (e.g. an authProvider matcher with no fields set,
78
+ which would match every provider on the instance) are logged at
79
+ apply time by the resolver, not raised here — they're not always
80
+ bugs.
81
+ """
82
+ errors: list[str] = []
83
+ for rule_index, rule in enumerate(rules, start=1):
84
+ label = rule.get("name") or f"<unnamed rule #{rule_index}>"
85
+ prefix = f"mapping {rule_index} ({label!r})"
86
+
87
+ users_section = cast(dict[str, object], rule.get("users") or {})
88
+ repos_section = cast(dict[str, object], rule.get("repos") or {})
89
+
90
+ if not users_section:
91
+ errors.append(f"{prefix}: `users:` section is empty (matches no users)")
92
+ if not repos_section:
93
+ errors.append(f"{prefix}: `repos:` section is empty (matches no repos)")
94
+
95
+ errors.extend(_validate_users_section(users_section, prefix))
96
+ errors.extend(_validate_repos_section(repos_section, prefix))
97
+
98
+ if errors:
99
+ bullet = "\n - "
100
+ raise SystemExit(
101
+ f"FATAL: {len(errors)} mapping configuration error(s):" + bullet + bullet.join(errors)
102
+ )
103
+
104
+
105
+ _KNOWN_USER_MATCHERS: set[str] = {"authProvider"}
106
+
107
+
108
+ def _validate_users_section(section: dict[str, object], prefix: str) -> list[str]:
109
+ """Reject unknown matcher keys and validate each matcher's shape."""
110
+ errors: list[str] = []
111
+ for key in section:
112
+ if key not in _KNOWN_USER_MATCHERS:
113
+ errors.append(f"{prefix}: unknown users matcher {key!r}")
114
+ auth_provider = cast(dict[str, object] | None, section.get("authProvider"))
115
+ if auth_provider is not None:
116
+ unknown = set(auth_provider) - AUTH_PROVIDER_MATCHER_FIELDS
117
+ for field_name in sorted(unknown):
118
+ errors.append(f"{prefix}: unknown authProvider field {field_name!r}")
119
+ if not auth_provider:
120
+ errors.append(
121
+ f"{prefix}: authProvider is empty (would match every provider on the instance)"
122
+ )
123
+ if "samlGroup" in auth_provider:
124
+ errors.extend(_validate_saml_group(auth_provider, prefix))
125
+ return errors
126
+
127
+
128
+ def _validate_saml_group(auth_provider: dict[str, object], prefix: str) -> list[str]:
129
+ """`authProvider.samlGroup`, if present, must be a non-empty string and
130
+ incompatible with a non-SAML `type:` (the rule could never match).
131
+ """
132
+ errors: list[str] = []
133
+ value = auth_provider["samlGroup"]
134
+ if not isinstance(value, str):
135
+ errors.append(
136
+ f"{prefix}: authProvider.samlGroup must be a single group-name "
137
+ f"string (got {type(value).__name__} {value!r}); to OR multiple "
138
+ f"groups, write multiple rules"
139
+ )
140
+ elif not value:
141
+ errors.append(f"{prefix}: authProvider.samlGroup is an empty string")
142
+ declared_type = auth_provider.get("type")
143
+ if (
144
+ isinstance(declared_type, str)
145
+ and declared_type
146
+ and declared_type != saml_groups.SAML_SERVICE_TYPE
147
+ ):
148
+ errors.append(
149
+ f"{prefix}: authProvider.samlGroup is set but authProvider.type "
150
+ f"is {declared_type!r}; only SAML providers carry group claims"
151
+ )
152
+ return errors
153
+
154
+
155
+ def _validate_repos_section(section: dict[str, object], prefix: str) -> list[str]:
156
+ """Reject unknown matcher keys and validate `codeHostConnection:` shape."""
157
+ errors: list[str] = []
158
+ for key in section:
159
+ if key not in {"codeHostConnection", "regex"}:
160
+ errors.append(f"{prefix}: unknown repos matcher {key!r}")
161
+ code_host_section = cast(dict[str, object] | None, section.get("codeHostConnection"))
162
+ if code_host_section is not None:
163
+ unknown = set(code_host_section) - CODE_HOST_MATCHER_FIELDS
164
+ for field_name in sorted(unknown):
165
+ errors.append(f"{prefix}: unknown codeHostConnection field {field_name!r}")
166
+ if not (set(code_host_section) & CODE_HOST_MATCHER_FIELDS):
167
+ errors.append(
168
+ f"{prefix}: codeHostConnection is empty (would match every "
169
+ f"external service on the instance); supply at least one of "
170
+ f"{sorted(CODE_HOST_MATCHER_FIELDS)}"
171
+ )
172
+ if "id" in code_host_section:
173
+ external_service_id = code_host_section["id"]
174
+ if external_service_id is None or external_service_id == "":
175
+ errors.append(
176
+ f"{prefix}: codeHostConnection.id, if supplied, must be "
177
+ f"a non-empty integer (e.g. `id: 5`)"
178
+ )
179
+ elif not isinstance(external_service_id, int) or isinstance(external_service_id, bool):
180
+ errors.append(
181
+ f"{prefix}: codeHostConnection.id must be an integer "
182
+ f"(got {type(external_service_id).__name__} {external_service_id!r}); "
183
+ f"the YAML config holds the decoded DB primary key, not the "
184
+ f"opaque base64 GraphQL Node ID"
185
+ )
186
+ if "config" in code_host_section and not isinstance(code_host_section["config"], dict):
187
+ errors.append(
188
+ f"{prefix}: codeHostConnection.config must be a mapping of "
189
+ f"key/value pairs to deep-subset-match against the service's "
190
+ f"parsed config (got {type(code_host_section['config']).__name__})"
191
+ )
192
+ regex = section.get("regex")
193
+ if regex is not None:
194
+ if not isinstance(regex, str):
195
+ errors.append(f"{prefix}: repos.regex must be a string (got {type(regex).__name__})")
196
+ elif not regex:
197
+ errors.append(f"{prefix}: repos.regex is an empty string")
198
+ else:
199
+ try:
200
+ re.compile(regex)
201
+ except re.error as exception:
202
+ errors.append(f"{prefix}: repos.regex is not a valid Python regex: {exception}")
203
+ return errors
204
+
205
+
206
+ # ---------------------------------------------------------------------------
207
+ # Users resolution
208
+ # ---------------------------------------------------------------------------
209
+
210
+
211
+ def resolve_users(
212
+ section: dict[str, object],
213
+ all_users: list[shared_types.User],
214
+ all_providers: list[shared_types.AuthProvider],
215
+ saml_groups_attribute_names: saml_groups.SamlGroupsAttributeNameByProvider | None = None,
216
+ ) -> list[shared_types.User]:
217
+ """Return users matching ALL matchers under `users:` (intersection).
218
+
219
+ `saml_groups_attribute_names` overrides the default `"groups"` SAML
220
+ assertion attribute name per (serviceID, clientID) — see
221
+ `src/src_auth_perms_sync/shared/saml_groups.py`. When
222
+ `None`, every SAML provider falls back to the default. Only
223
+ consulted by the `authProvider.samlGroup` sub-field.
224
+
225
+ Empty section returns an empty user set — `validate_mapping_rules`
226
+ rejects this at config-load time, so this branch only fires for
227
+ programmatic callers.
228
+ """
229
+ if not section:
230
+ return []
231
+
232
+ users_by_id: dict[str, shared_types.User] = {user["id"]: user for user in all_users}
233
+ matched_ids: set[str] | None = None
234
+ for key, matcher in section.items():
235
+ if key == "authProvider":
236
+ current_ids = {
237
+ user["id"]
238
+ for user in _users_matching_auth_provider(
239
+ cast(permission_types.AuthProviderMatcher, matcher),
240
+ all_users,
241
+ all_providers,
242
+ saml_groups_attribute_names,
243
+ )
244
+ }
245
+ else:
246
+ # validate_mapping_rules catches this earlier with a clearer
247
+ # message; this only fires for programmatic callers.
248
+ raise ValueError(f"unknown users matcher {key!r}")
249
+ matched_ids = current_ids if matched_ids is None else matched_ids & current_ids
250
+ if not matched_ids:
251
+ return []
252
+ assert matched_ids is not None
253
+ return [users_by_id[user_id] for user_id in matched_ids]
254
+
255
+
256
+ def user_matches_users_section(
257
+ section: dict[str, object],
258
+ user: shared_types.User,
259
+ all_providers: list[shared_types.AuthProvider],
260
+ saml_groups_attribute_names: saml_groups.SamlGroupsAttributeNameByProvider | None = None,
261
+ ) -> bool:
262
+ """Return whether one user matches ALL matchers under `users:`."""
263
+ if not section:
264
+ return False
265
+
266
+ for key, matcher in section.items():
267
+ if key == "authProvider":
268
+ if not _user_matches_auth_provider(
269
+ cast(permission_types.AuthProviderMatcher, matcher),
270
+ user,
271
+ all_providers,
272
+ saml_groups_attribute_names,
273
+ ):
274
+ return False
275
+ else:
276
+ # validate_mapping_rules catches this earlier with a clearer
277
+ # message; this only fires for programmatic callers.
278
+ raise ValueError(f"unknown users matcher {key!r}")
279
+ return True
280
+
281
+
282
+ def _users_matching_auth_provider(
283
+ matcher: permission_types.AuthProviderMatcher,
284
+ all_users: list[shared_types.User],
285
+ all_providers: list[shared_types.AuthProvider],
286
+ saml_groups_attribute_names: saml_groups.SamlGroupsAttributeNameByProvider | None,
287
+ ) -> list[shared_types.User]:
288
+ """Resolve `authProvider:` (and its optional `samlGroup:` sub-field)
289
+ to the users it selects.
290
+
291
+ When `samlGroup` is present, the matched-providers set is narrowed
292
+ to SAML providers (group claims only exist there) and each user
293
+ must additionally have that group named in the assertion stored on
294
+ their account in one of those providers.
295
+ """
296
+ saml_group = matcher.get("samlGroup")
297
+ matching_providers = _providers_matching(all_providers, matcher)
298
+ if saml_group:
299
+ matching_providers = [
300
+ provider
301
+ for provider in matching_providers
302
+ if provider["serviceType"] == saml_groups.SAML_SERVICE_TYPE
303
+ ]
304
+ if not matching_providers:
305
+ log.warning(
306
+ " authProvider matcher matched zero providers (%s).",
307
+ _format_matcher(cast(dict[str, object], matcher)),
308
+ )
309
+ return []
310
+ for provider in matching_providers:
311
+ log.info(
312
+ " authProvider → %s (type=%s serviceID=%s clientID=%s)",
313
+ provider["displayName"],
314
+ provider["serviceType"],
315
+ provider["serviceID"],
316
+ provider["clientID"],
317
+ )
318
+
319
+ matched: dict[str, shared_types.User] = {}
320
+ for provider in matching_providers:
321
+ if saml_group:
322
+ attribute_name = saml_groups.attribute_name_for(
323
+ saml_groups_attribute_names,
324
+ provider["serviceID"],
325
+ provider["clientID"],
326
+ )
327
+ for user in all_users:
328
+ if _user_has_saml_group_in_provider(user, provider, saml_group, attribute_name):
329
+ matched[user["id"]] = user
330
+ else:
331
+ for user in all_users:
332
+ if _user_has_account_in(user, provider):
333
+ matched[user["id"]] = user
334
+ if saml_group:
335
+ log.info(
336
+ " samlGroup → %d user(s) in group %r",
337
+ len(matched),
338
+ saml_group,
339
+ )
340
+ return list(matched.values())
341
+
342
+
343
+ def _user_matches_auth_provider(
344
+ matcher: permission_types.AuthProviderMatcher,
345
+ user: shared_types.User,
346
+ all_providers: list[shared_types.AuthProvider],
347
+ saml_groups_attribute_names: saml_groups.SamlGroupsAttributeNameByProvider | None,
348
+ ) -> bool:
349
+ """Return whether a single user matches an `authProvider:` matcher."""
350
+ saml_group = matcher.get("samlGroup")
351
+ matching_providers = _providers_matching(all_providers, matcher)
352
+ if saml_group:
353
+ matching_providers = [
354
+ provider
355
+ for provider in matching_providers
356
+ if provider["serviceType"] == saml_groups.SAML_SERVICE_TYPE
357
+ ]
358
+ if not matching_providers:
359
+ return False
360
+
361
+ for provider in matching_providers:
362
+ if saml_group:
363
+ attribute_name = saml_groups.attribute_name_for(
364
+ saml_groups_attribute_names,
365
+ provider["serviceID"],
366
+ provider["clientID"],
367
+ )
368
+ if _user_has_saml_group_in_provider(user, provider, saml_group, attribute_name):
369
+ return True
370
+ elif _user_has_account_in(user, provider):
371
+ return True
372
+ return False
373
+
374
+
375
+ def _providers_matching(
376
+ providers: list[shared_types.AuthProvider],
377
+ matcher: permission_types.AuthProviderMatcher,
378
+ ) -> list[shared_types.AuthProvider]:
379
+ """AND across the supplied matcher fields. The matcher's `type` key
380
+ maps to the GraphQL `serviceType` field; everything else has the
381
+ same name on both sides.
382
+ """
383
+ matched: list[shared_types.AuthProvider] = []
384
+ matcher_values = cast(Mapping[str, object], matcher)
385
+ for provider in providers:
386
+ provider_values = cast(Mapping[str, object], provider)
387
+ if not all(
388
+ matcher_key not in matcher_values
389
+ or matcher_values[matcher_key] == provider_values[provider_key]
390
+ for matcher_key, provider_key in AUTH_PROVIDER_VALUE_MATCHES
391
+ ):
392
+ continue
393
+ matched.append(provider)
394
+ return matched
395
+
396
+
397
+ def _user_has_account_in(user: shared_types.User, provider: shared_types.AuthProvider) -> bool:
398
+ """Return whether `user` has an account matching `provider`."""
399
+ if provider["serviceType"] == "builtin":
400
+ return bool(user.get("builtinAuth"))
401
+ for account in user["externalAccounts"]["nodes"]:
402
+ if (
403
+ account["serviceType"] == provider["serviceType"]
404
+ and account["serviceID"] == provider["serviceID"]
405
+ and account["clientID"] == provider["clientID"]
406
+ ):
407
+ return True
408
+ return False
409
+
410
+
411
+ def _user_has_saml_group_in_provider(
412
+ user: shared_types.User,
413
+ provider: shared_types.AuthProvider,
414
+ saml_group: str,
415
+ attribute_name: str,
416
+ ) -> bool:
417
+ """Return whether `user` has `saml_group` in one SAML provider account."""
418
+ for account in user["externalAccounts"]["nodes"]:
419
+ if (
420
+ account["serviceType"] == saml_groups.SAML_SERVICE_TYPE
421
+ and account["serviceID"] == provider["serviceID"]
422
+ and account["clientID"] == provider["clientID"]
423
+ and saml_group
424
+ in saml_groups.extract_saml_groups(account.get("accountData"), attribute_name)
425
+ ):
426
+ return True
427
+ return False
428
+
429
+
430
+ # ---------------------------------------------------------------------------
431
+ # Repos resolution
432
+ # ---------------------------------------------------------------------------
433
+
434
+
435
+ def resolve_repos(
436
+ section: dict[str, object],
437
+ services_by_id: dict[int, permission_types.ExternalService],
438
+ repos_by_external_service_id: dict[int, list[permission_types.Repository]],
439
+ all_repos_by_id: dict[str, permission_types.Repository],
440
+ ) -> list[permission_types.Repository]:
441
+ """Return repos matching ALL matchers under `repos:` (intersection).
442
+
443
+ Empty section returns an empty repo set; `validate_mapping_rules`
444
+ rejects this at config-load time.
445
+ """
446
+ if not section:
447
+ return []
448
+
449
+ matched_ids: set[str] | None = None
450
+ repo_index: dict[str, permission_types.Repository] = {}
451
+ ordered_keys = [key for key in ("codeHostConnection", "regex") if key in section]
452
+ for key in ordered_keys:
453
+ matcher = section[key]
454
+ if key == "codeHostConnection":
455
+ repos = _repos_matching_code_host_connection(
456
+ cast(permission_types.CodeHostConnectionMatcher, matcher),
457
+ services_by_id,
458
+ repos_by_external_service_id,
459
+ )
460
+ elif key == "regex":
461
+ candidate_repos = (
462
+ [repo_index[repo_id] for repo_id in matched_ids]
463
+ if matched_ids is not None
464
+ else list(all_repos_by_id.values())
465
+ )
466
+ repos = _repos_matching_regex(cast(str, matcher), candidate_repos)
467
+ else:
468
+ # validate_mapping_rules catches this earlier with a clearer
469
+ # message; this only fires for programmatic callers.
470
+ raise ValueError(f"unknown repos matcher {key!r}")
471
+ current_ids = {repo["id"] for repo in repos}
472
+ for repo in repos:
473
+ repo_index[repo["id"]] = repo
474
+ matched_ids = current_ids if matched_ids is None else matched_ids & current_ids
475
+ if not matched_ids:
476
+ return []
477
+ assert matched_ids is not None
478
+ return [repo_index[repo_id] for repo_id in matched_ids]
479
+
480
+
481
+ def _repos_matching_code_host_connection(
482
+ matcher: permission_types.CodeHostConnectionMatcher,
483
+ services_by_id: dict[int, permission_types.ExternalService],
484
+ repos_by_external_service_id: dict[int, list[permission_types.Repository]],
485
+ ) -> list[permission_types.Repository]:
486
+ matching_services = _services_matching(services_by_id, matcher)
487
+ if not matching_services:
488
+ log.warning(
489
+ " codeHostConnection matcher matched zero services (%s).",
490
+ _format_matcher(cast(dict[str, object], matcher)),
491
+ )
492
+ return []
493
+ matched_repos: dict[str, permission_types.Repository] = {}
494
+ for service in matching_services:
495
+ log.info(
496
+ " codeHostConnection → %s (id=%d kind=%s)",
497
+ service["displayName"],
498
+ id_codec.decode_external_service_id(service["id"]),
499
+ service["kind"],
500
+ )
501
+ external_service_id = id_codec.decode_external_service_id(service["id"])
502
+ for repo in repos_by_external_service_id.get(external_service_id, []):
503
+ matched_repos[repo["id"]] = repo
504
+ return list(matched_repos.values())
505
+
506
+
507
+ def _repos_matching_regex(
508
+ pattern: str, repos: list[permission_types.Repository]
509
+ ) -> list[permission_types.Repository]:
510
+ """Return repos whose name matches `pattern` using Python `re`.
511
+
512
+ Sourcegraph repo names usually omit the URL scheme (for example
513
+ `github.com/example/repo`). To keep URL-looking operator patterns
514
+ useful, also test `https://<repo name>`.
515
+ """
516
+ compiled = re.compile(pattern)
517
+ matched = [
518
+ repo
519
+ for repo in repos
520
+ if compiled.search(repo["name"]) or compiled.search(f"https://{repo['name']}")
521
+ ]
522
+ log.info(" regex → %d repo(s) matched %r", len(matched), pattern)
523
+ return matched
524
+
525
+
526
+ def _services_matching(
527
+ services_by_id: dict[int, permission_types.ExternalService],
528
+ matcher: permission_types.CodeHostConnectionMatcher,
529
+ ) -> list[permission_types.ExternalService]:
530
+ """AND across the supplied matcher fields. If `id` is supplied we
531
+ short-circuit to a single candidate; remaining fields then act as a
532
+ defensive cross-check against an ES recreated/renamed under the
533
+ same id. Without `id`, every other supplied field is a primary
534
+ discriminator across the full service list.
535
+ """
536
+ if "id" in matcher:
537
+ single_service = services_by_id.get(matcher["id"])
538
+ if single_service is None:
539
+ return []
540
+ candidates = [single_service]
541
+ else:
542
+ candidates = list(services_by_id.values())
543
+
544
+ matched: list[permission_types.ExternalService] = []
545
+ matcher_values = cast(Mapping[str, object], matcher)
546
+ for service in candidates:
547
+ service_values = cast(Mapping[str, object], service)
548
+ if not all(
549
+ field_name not in matcher_values
550
+ or matcher_values[field_name] == service_values[field_name]
551
+ for field_name in CODE_HOST_VALUE_MATCHES
552
+ ):
553
+ continue
554
+ if "config" in matcher and not _config_subset_matches(
555
+ matcher["config"], _parsed_service_config(service)
556
+ ):
557
+ continue
558
+ matched.append(service)
559
+ return matched
560
+
561
+
562
+ def _parsed_service_config(service: permission_types.ExternalService) -> dict[str, Any]:
563
+ """Best-effort parse of `ExternalService.config` (JSONC string).
564
+
565
+ Returns an empty dict if the config is missing or unparseable —
566
+ callers treat that as "no keys to match against", so a `config:`
567
+ matcher against such a service simply fails to match instead of
568
+ raising. Sourcegraph's resolver returns a JSON object string, so
569
+ parse failures here are anomalies worth not crashing on.
570
+ """
571
+ raw_config = service.get("config")
572
+ if not raw_config:
573
+ return {}
574
+ try:
575
+ parsed = cast(Any, json5.loads(raw_config))
576
+ except ValueError:
577
+ return {}
578
+ if not isinstance(parsed, dict):
579
+ return {}
580
+ return cast(dict[str, Any], parsed)
581
+
582
+
583
+ def _config_subset_matches(matcher_config: dict[str, Any], service_config: dict[str, Any]) -> bool:
584
+ """True iff every key in `matcher_config` is present in `service_config`
585
+ with a matching value. Nested dicts are matched recursively
586
+ (subset semantics); lists and scalars are matched by equality.
587
+
588
+ Sourcegraph's `REDACTED` sentinel is left as-is on the service side:
589
+ a matcher that names a redacted key (e.g. `token`) compares against
590
+ the literal `"REDACTED"` string and almost certainly fails to
591
+ match — exactly the semantics we want, since the operator can't
592
+ have known the real secret value.
593
+ """
594
+ for key, expected in matcher_config.items():
595
+ if key not in service_config:
596
+ return False
597
+ actual = service_config[key]
598
+ if isinstance(expected, dict) and isinstance(actual, dict):
599
+ if not _config_subset_matches(
600
+ cast(dict[str, Any], expected), cast(dict[str, Any], actual)
601
+ ):
602
+ return False
603
+ continue
604
+ if expected != actual:
605
+ return False
606
+ return True
607
+
608
+
609
+ def referenced_external_service_ids(rules: list[permission_types.MappingRule]) -> set[int]:
610
+ """Collect all external_service IDs referenced by the mapping rules.
611
+
612
+ Returns integer DB primary keys (the YAML-facing form). Used by
613
+ `cmd_set` to pre-flight-warn about any IDs that the live instance
614
+ doesn't know about, before per-mapping resolution runs.
615
+ """
616
+ referenced: set[int] = set()
617
+ for rule in rules:
618
+ repos_section = rule.get("repos") or {}
619
+ code_host_section = repos_section.get("codeHostConnection")
620
+ if code_host_section and "id" in code_host_section:
621
+ referenced.add(code_host_section["id"])
622
+ return referenced
623
+
624
+
625
+ def _format_matcher(matcher: dict[str, object]) -> str:
626
+ """Render a matcher dict as `key1=value1 key2=value2` for log output."""
627
+ return " ".join(f"{key}={value!r}" for key, value in matcher.items())