updates2mqtt 1.7.0__py3-none-any.whl → 1.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,23 +1,331 @@
1
1
  import re
2
- from typing import Any
2
+ import typing
3
+ from abc import abstractmethod
4
+ from typing import Any, cast
3
5
 
4
6
  import structlog
5
7
  from docker.auth import resolve_repository_name
6
- from hishel.httpx import SyncCacheClient
8
+ from docker.models.containers import Container
7
9
  from httpx import Response
8
10
  from omegaconf import MissingMandatoryValue, OmegaConf, ValidationError
9
11
 
12
+ from updates2mqtt.helpers import APIStatsCounter, CacheMetadata, ThrottledError, Throttler, fetch_url, validate_url
13
+ from updates2mqtt.model import DiscoveryArtefactDetail, DiscoveryInstallationDetail, ReleaseDetail
14
+
15
+ if typing.TYPE_CHECKING:
16
+ from docker.models.images import RegistryData
17
+ from http import HTTPStatus
18
+
19
+ import docker
20
+ import docker.errors
21
+
10
22
  from updates2mqtt.config import (
11
- NO_KNOWN_IMAGE,
12
23
  PKG_INFO_FILE,
13
24
  DockerConfig,
14
25
  DockerPackageUpdateInfo,
15
26
  PackageUpdateInfo,
27
+ RegistryConfig,
16
28
  UpdateInfoConfig,
17
29
  )
18
30
 
19
31
  log = structlog.get_logger()
20
32
 
33
+ SOURCE_PLATFORM_GITHUB = "GitHub"
34
+ SOURCE_PLATFORM_CODEBERG = "CodeBerg"
35
+ SOURCE_PLATFORMS = {SOURCE_PLATFORM_GITHUB: r"https://github.com/.*"}
36
+ DIFF_URL_TEMPLATES = {
37
+ SOURCE_PLATFORM_GITHUB: "{repo}/commit/{revision}",
38
+ }
39
+ RELEASE_URL_TEMPLATES = {SOURCE_PLATFORM_GITHUB: "{repo}/releases/tag/{version}"}
40
+ UNKNOWN_RELEASE_URL_TEMPLATES = {SOURCE_PLATFORM_GITHUB: "{repo}/releases"}
41
+ MISSING_VAL = "**MISSING**"
42
+ UNKNOWN_REGISTRY = "**UNKNOWN_REGISTRY**"
43
+
44
+ HEADER_DOCKER_DIGEST = "docker-content-digest"
45
+ HEADER_DOCKER_API = "docker-distribution-api-version"
46
+
47
+ TOKEN_URL_TEMPLATE = "https://{auth_host}/token?scope=repository:{image_name}:pull&service={service}" # noqa: S105 # nosec
48
+ REGISTRIES = {
49
+ # registry: (auth_host, api_host, service, url_template)
50
+ "docker.io": ("auth.docker.io", "registry-1.docker.io", "registry.docker.io", TOKEN_URL_TEMPLATE),
51
+ "mcr.microsoft.com": (None, "mcr.microsoft.com", "mcr.microsoft.com", TOKEN_URL_TEMPLATE),
52
+ "ghcr.io": ("ghcr.io", "ghcr.io", "ghcr.io", TOKEN_URL_TEMPLATE),
53
+ "lscr.io": ("ghcr.io", "lscr.io", "ghcr.io", TOKEN_URL_TEMPLATE),
54
+ "codeberg.org": ("codeberg.org", "codeberg.org", "container_registry", TOKEN_URL_TEMPLATE),
55
+ "registry.gitlab.com": (
56
+ "www.gitlab.com",
57
+ "registry.gitlab.com",
58
+ "container_registry",
59
+ "https://{auth_host}/jwt/auth?service={service}&scope=repository:{image_name}:pull&offline_token=true&client_id=docker",
60
+ ),
61
+ }
62
+
63
+ # source: https://specs.opencontainers.org/distribution-spec/?v=v1.0.0#pull
64
+ OCI_NAME_RE = r"[a-z0-9]+((\.|_|__|-+)[a-z0-9]+)*(\/[a-z0-9]+((\.|_|__|-+)[a-z0-9]+)*)*"
65
+ OCI_TAG_RE = r"[a-zA-Z0-9_][a-zA-Z0-9._-]{0,127}"
66
+
67
+
68
+ class DockerImageInfo(DiscoveryArtefactDetail):
69
+ """Normalize and shlep around the bits of an image def
70
+
71
+ index_name: aka index_name, e.g. ghcr.io
72
+ name: image ref without index name or tag, e.g. nginx, or librenms/librenms
73
+ tag: tag or digest
74
+ untagged_ref: combined index name and package name
75
+ """
76
+
77
+ def __init__(
78
+ self,
79
+ ref: str, # ref with optional index name and tag or digest, index:name:tag_or_digest
80
+ image_digest: str | None = None,
81
+ tags: list[str] | None = None,
82
+ attributes: dict[str, Any] | None = None,
83
+ annotations: dict[str, Any] | None = None,
84
+ platform: str | None = None, # test harness simplification
85
+ version: str | None = None, # test harness simplification
86
+ ) -> None:
87
+ self.ref: str = ref
88
+ self.version: str | None = version
89
+ self.image_digest: str | None = image_digest
90
+ self.short_digest: str | None = None
91
+ self.repo_digest: str | None = None # the single RepoDigest known to match registry
92
+ self.git_digest: str | None = None
93
+ self.index_name: str | None = None
94
+ self.name: str | None = None
95
+ self.tag: str | None = None
96
+ self.pinned_digest: str | None = None
97
+ # untagged ref using combined index and remote name used only for pattern matching common pkg info
98
+ self.untagged_ref: str | None = None # index_name/remote_name used for pkg match
99
+ self.tag_or_digest: str | None = None # index_name/remote_name:**tag_or_digest**
100
+ self.tags = tags
101
+ self.attributes: dict[str, Any] = attributes or {}
102
+ self.annotations: dict[str, Any] = annotations or {}
103
+ self.throttled: bool = False
104
+ self.origin: str | None = None
105
+ self.error: str | None = None
106
+ self.platform: str | None = platform
107
+ self.custom: dict[str, str | float | int | bool | None] = {}
108
+
109
+ self.local_build: bool = not self.repo_digests
110
+ self.index_name, remote_name = resolve_repository_name(ref)
111
+
112
+ self.name = remote_name
113
+
114
+ if remote_name and ":" in remote_name and ("@" not in remote_name or remote_name.index("@") > remote_name.index(":")):
115
+ # name:tag format
116
+ self.name, self.tag_or_digest = remote_name.split(":", 1)
117
+ self.untagged_ref = ref.split(":", 1)[0]
118
+ self.tag = self.tag_or_digest
119
+
120
+ elif remote_name and "@" in remote_name:
121
+ # name@digest format
122
+ self.name, self.tag_or_digest = remote_name.split("@", 1)
123
+ self.untagged_ref = ref.split("@", 1)[0]
124
+ self.pinned_digest = self.tag_or_digest
125
+
126
+ if self.tag and "@" in self.tag:
127
+ # name:tag@digest format
128
+ # for pinned tags, care only about the digest part
129
+ self.tag, self.tag_or_digest = self.tag.split("@", 1)
130
+ self.pinned_digest = self.tag_or_digest
131
+ if self.tag_or_digest is None:
132
+ self.tag_or_digest = "latest"
133
+ self.untagged_ref = ref
134
+ self.tag = self.tag_or_digest
135
+
136
+ if self.repo_digest is None and len(self.repo_digests) == 1:
137
+ # definite known RepoDigest
138
+ # if its ambiguous, the final version selection will handle it
139
+ self.repo_digest = self.repo_digests[0]
140
+
141
+ if self.index_name == "docker.io" and "/" not in self.name:
142
+ # "official Docker images have an abbreviated library/foo name"
143
+ self.name = f"library/{self.name}"
144
+ if self.name is not None and not re.match(OCI_NAME_RE, self.name):
145
+ log.warning("Invalid OCI image name: %s", self.name)
146
+ if self.tag and not re.match(OCI_TAG_RE, self.tag):
147
+ log.warning("Invalid OCI image tag: %s", self.tag)
148
+
149
+ if self.os and self.arch:
150
+ self.platform = "/".join(
151
+ filter(
152
+ None,
153
+ [self.os, self.arch, self.variant],
154
+ ),
155
+ )
156
+
157
+ if self.image_digest is not None:
158
+ self.image_digest = self.condense_digest(self.image_digest, short=False)
159
+ self.short_digest = self.condense_digest(self.image_digest) # type: ignore[arg-type]
160
+
161
+ @property
162
+ def repo_digests(self) -> list[str]:
163
+ if self.repo_digest:
164
+ return [self.repo_digest]
165
+ # RepoDigest in image inspect, Registry Config object
166
+ digests = [v.split("@", 1)[1] if "@" in v else v for v in self.attributes.get("RepoDigests", [])]
167
+ return digests or []
168
+
169
+ @property
170
+ def pinned(self) -> bool:
171
+ """Check if this is pinned and installed version consistent with pin"""
172
+ return bool(self.pinned_digest and self.pinned_digest in self.repo_digests)
173
+
174
+ @property
175
+ def os(self) -> str | None:
176
+ return self.attributes.get("Os")
177
+
178
+ @property
179
+ def arch(self) -> str | None:
180
+ return self.attributes.get("Architecture")
181
+
182
+ @property
183
+ def variant(self) -> str | None:
184
+ return self.attributes.get("Variant")
185
+
186
+ def condense_digest(self, digest: str, short: bool = True) -> str | None:
187
+ try:
188
+ digest = digest.split("@")[1] if "@" in digest else digest # fully qualified RepoDigest
189
+ if short:
190
+ digest = digest.split(":")[1] if ":" in digest else digest # remove digest type prefix
191
+ return digest[0:12]
192
+ return digest
193
+ except Exception:
194
+ return None
195
+
196
+ def reuse(self) -> "DockerImageInfo":
197
+ cloned = DockerImageInfo(self.ref, self.image_digest, self.tags, self.attributes, self.annotations, self.version)
198
+ cloned.origin = "REUSED"
199
+ return cloned
200
+
201
+ def as_dict(self, minimal: bool = True) -> dict[str, str | list | dict | bool | int | None]:
202
+ result: dict[str, str | list | dict | bool | int | None] = {
203
+ "image_ref": self.ref,
204
+ "name": self.name,
205
+ "version": self.version,
206
+ "image_digest": self.image_digest,
207
+ "repo_digest": self.repo_digest,
208
+ "repo_digests": self.repo_digest,
209
+ "git_digest": self.git_digest,
210
+ "index_name": self.index_name,
211
+ "tag": self.tag,
212
+ "pinned_digest": self.pinned_digest,
213
+ "tag_or_digest": self.tag_or_digest,
214
+ "tags": self.tags,
215
+ "origin": self.origin,
216
+ "platform": self.platform,
217
+ "local_build": self.local_build,
218
+ "error": self.error,
219
+ "throttled": self.throttled,
220
+ "custom": self.custom,
221
+ }
222
+ if not minimal:
223
+ result["attributes"] = self.attributes
224
+ result["annotations"] = self.annotations
225
+ return result
226
+
227
+
228
+ def id_source_platform(source: str | None) -> str | None:
229
+ candidates: list[str] = [platform for platform, pattern in SOURCE_PLATFORMS.items() if re.match(pattern, source or "")]
230
+ return candidates[0] if candidates else None
231
+
232
+
233
+ def _select_annotation(
234
+ name: str, key: str, local_info: DockerImageInfo | None = None, registry_info: DockerImageInfo | None = None
235
+ ) -> dict[str, str | None]:
236
+ result: dict[str, str | None] = {}
237
+ if registry_info:
238
+ v: Any | None = registry_info.annotations.get(key)
239
+ if v is not None:
240
+ result[name] = v
241
+ elif local_info:
242
+ v = local_info.annotations.get(key)
243
+ if v is not None:
244
+ result[name] = v
245
+ return result
246
+
247
+
248
+ def cherrypick_annotations(
249
+ local_info: DockerImageInfo | None, registry_info: DockerImageInfo | None
250
+ ) -> dict[str, str | float | int | bool | None]:
251
+ """https://github.com/opencontainers/image-spec/blob/main/annotations.md"""
252
+ results: dict[str, str | float | int | bool | None] = {}
253
+ for either_name, either_label in [
254
+ ("documentation_url", "org.opencontainers.image.documentation"),
255
+ ("description", "org.opencontainers.image.description"),
256
+ ("licences", "org.opencontainers.image.licenses"),
257
+ ("image_base", "org.opencontainers.image.base.name"),
258
+ ("image_created", "org.opencontainers.image.created"),
259
+ ("image_version", "org.opencontainers.image.version"),
260
+ ("image_revision", "org.opencontainers.image.revision"),
261
+ ("title", "org.opencontainers.image.title"),
262
+ ("vendor", "org.opencontainers.image.vendor"),
263
+ ("source", "org.opencontainers.image.source"),
264
+ ]:
265
+ results.update(_select_annotation(either_name, either_label, local_info, registry_info))
266
+ return results
267
+
268
+
269
+ class DockerServiceDetails(DiscoveryInstallationDetail):
270
+ def __init__(
271
+ self,
272
+ container_name: str | None = None,
273
+ compose_path: str | None = None,
274
+ compose_version: str | None = None,
275
+ compose_service: str | None = None,
276
+ git_repo_path: str | None = None,
277
+ ) -> None:
278
+ self.container_name: str | None = container_name
279
+ self.compose_path: str | None = compose_path
280
+ self.compose_version: str | None = compose_version
281
+ self.compose_service: str | None = compose_service
282
+ self.git_repo_path: str | None = git_repo_path
283
+ self.git_local_timestamp: str | None = None
284
+
285
+ def as_dict(self) -> dict[str, str | list | dict | bool | int | None]:
286
+ results: dict[str, str | list | dict | bool | int | None] = {
287
+ "container_name": self.container_name,
288
+ "compose_path": self.compose_path,
289
+ "compose_service": self.compose_service,
290
+ "compose_version": self.compose_version,
291
+ }
292
+ if self.git_local_timestamp:
293
+ results["git_local_timestamp"] = self.git_local_timestamp
294
+ if self.git_repo_path:
295
+ results["git_repo_path"] = self.git_repo_path
296
+ return results
297
+
298
+
299
+ class LocalContainerInfo:
300
+ def build_image_info(self, container: Container) -> tuple[DockerImageInfo, DockerServiceDetails]:
301
+ """Image contents equiv to `docker inspect image <image_ref>`"""
302
+ # container image can be none if someone ran `docker rmi -f`
303
+ # so although this could be sourced from image, like `container.image.tags[0]`
304
+ # use the container ref instead, which survives monkeying about with images
305
+ image_ref: str = container.attrs.get("Config", {}).get("Image") or ""
306
+ image_digest = container.attrs.get("Image")
307
+
308
+ image_info: DockerImageInfo = DockerImageInfo(
309
+ image_ref,
310
+ image_digest=image_digest,
311
+ tags=container.image.tags if container and container.image else None,
312
+ annotations=container.image.labels if container.image else None,
313
+ attributes=container.image.attrs if container.image else None,
314
+ )
315
+ service_info: DockerServiceDetails = DockerServiceDetails(
316
+ container.name,
317
+ compose_path=container.labels.get("com.docker.compose.project.working_dir"),
318
+ compose_service=container.labels.get("com.docker.compose.service"),
319
+ compose_version=container.labels.get("com.docker.compose.version"),
320
+ )
321
+
322
+ labels: dict[str, str | float | int | bool | None] = cherrypick_annotations(image_info, None)
323
+ # capture container labels/annotations, not image ones
324
+ labels = labels or {}
325
+ image_info.custom = labels
326
+ image_info.version = cast("str|None", labels.get("image_version"))
327
+ return image_info, service_info
328
+
21
329
 
22
330
  class PackageEnricher:
23
331
  def __init__(self, docker_cfg: DockerConfig) -> None:
@@ -28,19 +336,19 @@ class PackageEnricher:
28
336
  def initialize(self) -> None:
29
337
  pass
30
338
 
31
- def enrich(self, image_name: str | None, image_ref: str | None, log: Any) -> PackageUpdateInfo | None:
339
+ def enrich(self, image_info: DockerImageInfo) -> PackageUpdateInfo | None:
32
340
  def match(pkg: PackageUpdateInfo) -> bool:
33
341
  if pkg is not None and pkg.docker is not None and pkg.docker.image_name is not None:
34
- if image_name is not None and image_name == pkg.docker.image_name:
342
+ if image_info.untagged_ref is not None and image_info.untagged_ref == pkg.docker.image_name:
35
343
  return True
36
- if image_ref is not None and image_ref == pkg.docker.image_name:
344
+ if image_info.ref is not None and image_info.ref == pkg.docker.image_name:
37
345
  return True
38
346
  return False
39
347
 
40
- if image_name is not None and image_ref is not None:
348
+ if image_info.untagged_ref is not None and image_info.ref is not None:
41
349
  for pkg in self.pkgs.values():
42
350
  if match(pkg):
43
- log.debug(
351
+ self.log.debug(
44
352
  "Found common package",
45
353
  image_name=pkg.docker.image_name, # type: ignore [union-attr]
46
354
  logo_url=pkg.logo_url,
@@ -51,10 +359,10 @@ class PackageEnricher:
51
359
 
52
360
 
53
361
  class DefaultPackageEnricher(PackageEnricher):
54
- def enrich(self, image_name: str | None, image_ref: str | None, log: Any) -> PackageUpdateInfo | None:
55
- log.debug("Default pkg info", image_name=image_name, image_ref=image_ref)
362
+ def enrich(self, image_info: DockerImageInfo) -> PackageUpdateInfo | None:
363
+ self.log.debug("Default pkg info", image_name=image_info.untagged_ref, image_ref=image_info.ref)
56
364
  return PackageUpdateInfo(
57
- DockerPackageUpdateInfo(image_name or NO_KNOWN_IMAGE),
365
+ DockerPackageUpdateInfo(image_info.untagged_ref or image_info.ref),
58
366
  logo_url=self.cfg.default_entity_picture_url,
59
367
  release_notes_url=None,
60
368
  )
@@ -84,19 +392,15 @@ class LinuxServerIOPackageEnricher(PackageEnricher):
84
392
  if cfg is None or not cfg.enabled:
85
393
  return
86
394
 
87
- try:
88
- with SyncCacheClient(headers=[("cache-control", f"max-age={cfg.cache_ttl}")]) as client:
89
- log.debug(f"Fetching linuxserver.io metadata from API, cache_ttl={cfg.cache_ttl}")
90
- response: Response = client.get(
91
- "https://api.linuxserver.io/api/v1/images?include_config=false&include_deprecated=false"
92
- )
93
- if response.status_code != 200:
94
- log.error("Failed to fetch linuxserver.io metadata, non-200 response", status_code=response.status_code)
95
- return
96
- api_data: Any = response.json()
97
- repos: list = api_data.get("data", {}).get("repositories", {}).get("linuxserver", [])
98
- except Exception:
99
- log.exception("Failed to fetch linuxserver.io metadata")
395
+ log.debug(f"Fetching linuxserver.io metadata from API, cache_ttl={cfg.cache_ttl}")
396
+ response: Response | None = fetch_url(
397
+ "https://api.linuxserver.io/api/v1/images?include_config=false&include_deprecated=false",
398
+ cache_ttl=cfg.cache_ttl,
399
+ )
400
+ if response and response.is_success:
401
+ api_data: Any = response.json()
402
+ repos: list = api_data.get("data", {}).get("repositories", {}).get("linuxserver", [])
403
+ else:
100
404
  return
101
405
 
102
406
  added = 0
@@ -113,232 +417,460 @@ class LinuxServerIOPackageEnricher(PackageEnricher):
113
417
  log.info(f"Added {added} linuxserver.io package details")
114
418
 
115
419
 
116
- def fetch_url(
117
- url: str, cache_ttl: int = 300, bearer_token: str | None = None, response_type: str | None = None
118
- ) -> Response | None:
119
- try:
120
- headers = [("cache-control", f"max-age={cache_ttl}")]
121
- if bearer_token:
122
- headers.append(("Authorization", f"Bearer {bearer_token}"))
123
- if response_type:
124
- headers.append(("Accept", response_type))
125
- with SyncCacheClient(headers=headers) as client:
126
- log.debug(f"Fetching URL {url}, cache_ttl={cache_ttl}")
127
- response: Response = client.get(url)
128
- if not response.is_success:
129
- log.debug("URL %s fetch returned non-success status: %s", url, response.status_code)
130
- return response
131
- except Exception as e:
132
- log.debug("URL %s failed to fetch: %s", url, e)
133
- return None
134
-
135
-
136
- def validate_url(url: str, cache_ttl: int = 300) -> bool:
137
- response: Response | None = fetch_url(url, cache_ttl=cache_ttl)
138
- return response is not None and response.is_success
420
+ class SourceReleaseEnricher:
421
+ def __init__(self) -> None:
422
+ self.log: Any = structlog.get_logger().bind(integration="docker")
139
423
 
424
+ def enrich(
425
+ self, registry_info: DockerImageInfo, source_repo_url: str | None = None, notes_url: str | None = None
426
+ ) -> ReleaseDetail | None:
427
+ if not registry_info.annotations and not source_repo_url and not notes_url:
428
+ return None
140
429
 
141
- SOURCE_PLATFORM_GITHUB = "GitHub"
142
- SOURCE_PLATFORMS = {SOURCE_PLATFORM_GITHUB: r"https://github.com/.*"}
143
- DIFF_URL_TEMPLATES = {
144
- SOURCE_PLATFORM_GITHUB: "{source}/commit/{revision}",
145
- }
146
- RELEASE_URL_TEMPLATES = {SOURCE_PLATFORM_GITHUB: "{source}/releases/tag/{version}"}
430
+ detail = ReleaseDetail()
147
431
 
432
+ detail.notes_url = notes_url
433
+ detail.version = registry_info.annotations.get("org.opencontainers.image.version")
434
+ detail.revision = registry_info.annotations.get("org.opencontainers.image.revision")
435
+ detail.source_url = registry_info.annotations.get("org.opencontainers.image.source") or source_repo_url
148
436
 
149
- class SourceReleaseEnricher:
150
- def __init__(self) -> None:
151
- self.log: Any = structlog.get_logger().bind(integration="docker")
437
+ if detail.source_url and "#" in detail.source_url:
438
+ detail.source_repo_url = detail.source_url.split("#", 1)[0]
439
+ self.log.debug("Simplifying %s from %s", detail.source_repo_url, detail.source_url)
440
+ else:
441
+ detail.source_repo_url = detail.source_url
442
+
443
+ detail.source_platform = id_source_platform(detail.source_repo_url)
444
+ if not detail.source_platform:
445
+ self.log.debug("No known source platform found on container", source=detail.source_repo_url)
446
+ return detail
447
+
448
+ template_vars: dict[str, str | None] = {
449
+ "version": detail.version or MISSING_VAL,
450
+ "revision": detail.revision or MISSING_VAL,
451
+ "repo": detail.source_repo_url or MISSING_VAL,
452
+ "source": detail.source_url or MISSING_VAL,
453
+ }
454
+
455
+ diff_url: str | None = DIFF_URL_TEMPLATES[detail.source_platform].format(**template_vars)
456
+ if diff_url and MISSING_VAL not in diff_url and validate_url(diff_url):
457
+ detail.diff_url = diff_url
458
+ else:
459
+ diff_url = None
152
460
 
153
- def enrich(self, annotations: dict[str, str]) -> dict[str, str]:
154
- results: dict[str, str] = {}
155
- image_version: str | None = annotations.get("org.opencontainers.image.version")
156
- image_digest: str | None = annotations.get("org.opencontainers.image.revision")
157
- source = annotations.get("org.opencontainers.image.source")
158
- source_platforms = [platform for platform, pattern in SOURCE_PLATFORMS.items() if re.match(pattern, source or "")]
159
- if not source_platforms:
160
- self.log.debug("No known source platform found on container", source=source)
161
- return results
162
- source_platform = source_platforms[0]
163
-
164
- if source:
165
- template_vars: dict[str, str | None] = {
166
- "source": source,
167
- "version": image_version,
168
- "revision": image_digest,
169
- }
170
- diff_url = DIFF_URL_TEMPLATES[source_platform].format(**template_vars)
171
- if validate_url(diff_url):
172
- results["diff_url"] = diff_url
461
+ if detail.notes_url is None:
462
+ detail.notes_url = RELEASE_URL_TEMPLATES[detail.source_platform].format(**template_vars)
173
463
 
174
- release_url = RELEASE_URL_TEMPLATES[source_platform].format(**template_vars)
464
+ if MISSING_VAL in detail.notes_url or not validate_url(detail.notes_url):
465
+ detail.notes_url = UNKNOWN_RELEASE_URL_TEMPLATES[detail.source_platform].format(**template_vars)
466
+ if MISSING_VAL in detail.notes_url or not validate_url(detail.notes_url):
467
+ detail.notes_url = None
175
468
 
176
- if validate_url(release_url):
177
- results["release_url"] = release_url
469
+ if detail.source_platform == SOURCE_PLATFORM_GITHUB and detail.source_repo_url:
470
+ base_api = detail.source_repo_url.replace("https://github.com", "https://api.github.com/repos")
178
471
 
179
- if source_platform == SOURCE_PLATFORM_GITHUB and source:
180
- base_api = source.replace("https://github.com", "https://api.github.com/repos")
181
- api_response: Response | None = fetch_url(f"{base_api}/releases/tags/{image_version}")
472
+ api_response: Response | None = fetch_url(f"{base_api}/releases/tags/{detail.version}")
182
473
  if api_response and api_response.is_success:
183
474
  api_results: Any = httpx_json_content(api_response, {})
184
- results["release_summary"] = api_results.get("body") # ty:ignore[possibly-missing-attribute]
475
+ detail.summary = api_results.get("body") # ty:ignore[possibly-missing-attribute]
185
476
  reactions = api_results.get("reactions") # ty:ignore[possibly-missing-attribute]
186
477
  if reactions:
187
- results["net_score"] = reactions.get("+1", 0) - reactions.get("-1", 0)
478
+ detail.net_score = reactions.get("+1", 0) - reactions.get("-1", 0)
188
479
  else:
189
480
  self.log.debug(
190
481
  "Failed to fetch GitHub release info",
191
- url=f"{base_api}/releases/tags/{image_version}",
482
+ url=f"{base_api}/releases/tags/{detail.version}",
192
483
  status_code=(api_response and api_response.status_code) or None,
193
484
  )
194
- return results
485
+ if not detail.summary and detail.diff_url:
486
+ detail.summary = f"<a href='{detail.diff_url}'>{detail.version or detail.revision} Diff</a>"
487
+ return detail
195
488
 
196
489
 
197
490
  class AuthError(Exception):
198
491
  pass
199
492
 
200
493
 
201
- REGISTRIES = {
202
- # registry: (auth_host, api_host, service)
203
- "docker.io": ("auth.docker.io", "registry-1.docker.io", "registry.docker.io"),
204
- "mcr.microsoft.com": (None, "mcr.microsoft.com", "mcr.microsoft.com"),
205
- "ghcr.io": ("ghcr.io", "ghcr.io", "ghcr.io"),
206
- "lscr.io": ("ghcr.io", "lscr.io", "ghcr.io"),
207
- "codeberg.org": ("codeberg.org", "codeberg.org", "container_registry"),
208
- }
209
-
210
-
211
494
  def httpx_json_content(response: Response, default: Any = None) -> Any | None:
212
- if response and "json" in response.headers.get("content-type"):
495
+ if response and "json" in response.headers.get("content-type", ""):
213
496
  try:
214
497
  return response.json()
215
498
  except Exception:
216
499
  log.debug("Failed to parse JSON response: %s", response.text)
500
+ elif response and response.headers.get("content-type", "") == "application/octet-stream":
501
+ # blob could return a gzip layer tarball, however assumed only index, manifest or config requested
502
+ try:
503
+ return response.json()
504
+ except Exception:
505
+ log.debug("Failed to parse assumed JSON response: %s", response.text)
217
506
  return default
218
507
 
219
508
 
220
- class LabelEnricher:
509
+ class VersionLookup:
221
510
  def __init__(self) -> None:
222
- self.log: Any = structlog.get_logger().bind(integration="docker")
511
+ self.log: Any = structlog.get_logger().bind(integration="docker", tool="version_lookup")
512
+
513
+ @abstractmethod
514
+ def lookup(self, local_image_info: DockerImageInfo, **kwargs) -> DockerImageInfo: # noqa: ANN003
515
+ pass
516
+
517
+
518
+ class ContainerDistributionAPIVersionLookup(VersionLookup):
519
+ def __init__(self, throttler: Throttler, cfg: RegistryConfig) -> None:
520
+ self.throttler: Throttler = throttler
521
+ self.cfg: RegistryConfig = cfg
522
+ self.log: Any = structlog.get_logger().bind(integration="docker", tool="version_lookup")
523
+ self.api_stats = APIStatsCounter()
524
+
525
+ def fetch_token(self, registry: str, image_name: str) -> str | None:
526
+ default_host: tuple[str, str, str, str] = (registry, registry, registry, TOKEN_URL_TEMPLATE)
527
+ auth_host: str | None = REGISTRIES.get(registry, default_host)[0]
528
+ if auth_host is None:
529
+ return None
530
+
531
+ service: str = REGISTRIES.get(registry, default_host)[2]
532
+ url_template: str = REGISTRIES.get(registry, default_host)[3]
533
+ auth_url: str = url_template.format(auth_host=auth_host, image_name=image_name, service=service)
534
+ response: Response | None = fetch_url(
535
+ auth_url, cache_ttl=self.cfg.token_cache_ttl, follow_redirects=True, api_stats_counter=self.api_stats
536
+ )
223
537
 
224
- def fetch_token(self, auth_host: str, service: str, image_name: str) -> str | None:
225
- logger = self.log.bind(image_name=image_name, action="auth_registry")
226
- auth_url: str = f"https://{auth_host}/token?scope=repository:{image_name}:pull&service={service}"
227
- response: Response | None = fetch_url(auth_url, cache_ttl=30)
228
538
  if response and response.is_success:
229
539
  api_data = httpx_json_content(response, {})
230
540
  token: str | None = api_data.get("token") if api_data else None
231
541
  if token:
232
542
  return token
233
- logger.warning("No token found in response")
543
+ self.log.warning("No token found in response for %s", auth_url)
234
544
  raise AuthError(f"No token found in response for {image_name}")
235
545
 
236
- logger.debug(
237
- "Non-success response fetching token: %s",
546
+ self.log.debug(
547
+ "Non-success response at %s fetching token: %s",
548
+ auth_url,
238
549
  (response and response.status_code) or None,
239
550
  )
240
551
  if response and response.status_code == 404:
241
- response = fetch_url(f"https://{auth_host}/v2/")
552
+ self.log.debug(
553
+ "Default token URL %s not found, calling /v2 endpoint to validate OCI API and provoke auth", auth_url
554
+ )
555
+ response = fetch_url(
556
+ f"https://{auth_host}/v2",
557
+ follow_redirects=True,
558
+ allow_stale=False,
559
+ cache_ttl=0,
560
+ api_stats_counter=self.api_stats,
561
+ )
562
+
242
563
  if response and response.status_code == 401:
243
564
  auth = response.headers.get("www-authenticate")
244
565
  if not auth:
245
- logger.debug("No www-authenticate header found in 401 response")
566
+ self.log.warning("No www-authenticate header found in 401 response for %s", auth_url)
246
567
  raise AuthError(f"No www-authenticate header found on 401 for {image_name}")
247
568
  match = re.search(r'realm="([^"]+)",service="([^"]+)",scope="([^"]+)"', auth)
248
569
  if not match:
249
- logger.debug("No realm/service/scope found in www-authenticate header")
570
+ self.log.warning("No realm/service/scope found in www-authenticate header for %s", auth_url)
250
571
  raise AuthError(f"No realm/service/scope found on 401 headers for {image_name}")
251
572
 
252
573
  realm, service, scope = match.groups()
253
574
  auth_url = f"{realm}?service={service}&scope={scope}"
254
- response = fetch_url(auth_url)
575
+ response = fetch_url(auth_url, follow_redirects=True, api_stats_counter=self.api_stats)
576
+
255
577
  if response and response.is_success:
256
578
  token_data = response.json()
257
- logger.debug("Fetched registry token")
579
+ self.log.debug("Fetched registry token from %s", auth_url)
258
580
  return token_data.get("token")
581
+ self.log.warning(
582
+ "Alternative auth %s with status %s has no token", auth_url, (response and response.status_code) or None
583
+ )
584
+ elif response:
585
+ self.log.warning("Auth %s failed with status %s", auth_url, (response and response.status_code) or None)
259
586
 
260
- logger.debug("Failed to fetch registry token")
261
- raise AuthError(f"Failed to fetch token for {image_name}")
587
+ raise AuthError(f"Failed to fetch token for {image_name} at {auth_url}")
262
588
 
263
- def fetch_annotations(
264
- self,
265
- image_ref: str,
266
- os: str,
267
- arch: str,
268
- token: str | None = None,
269
- mutable_cache_ttl: int = 600,
270
- immutable_cache_ttl: int = 86400,
271
- ) -> dict[str, str]:
272
- logger = self.log.bind(image_ref=image_ref, action="enrich_registry")
273
- annotations: dict[str, str] = {}
274
- if token:
275
- logger.debug("Using provided token to fetch manifest for image %s", image_ref)
276
- registry, ref = resolve_repository_name(image_ref)
277
- default_host = (registry, registry, registry)
278
- auth_host: str | None = REGISTRIES.get(registry, default_host)[0]
279
- api_host: str | None = REGISTRIES.get(registry, default_host)[1]
280
- service: str = REGISTRIES.get(registry, default_host)[2]
281
- img_name = ref.split(":")[0] if ":" in ref else ref
282
- img_name = img_name if "/" in img_name else f"library/{img_name}"
283
- if auth_host is not None and token is None:
284
- token = self.fetch_token(auth_host, service, img_name)
589
+ def fetch_index(
590
+ self, api_host: str, local_image_info: DockerImageInfo, token: str | None
591
+ ) -> tuple[Any | None, str | None, CacheMetadata | None]:
592
+ if local_image_info.tag:
593
+ api_url: str = f"https://{api_host}/v2/{local_image_info.name}/manifests/{local_image_info.tag}"
594
+ cache_ttl: int | None = self.cfg.mutable_cache_ttl
595
+ else:
596
+ api_url = f"https://{api_host}/v2/{local_image_info.name}/manifests/{local_image_info.pinned_digest}"
597
+ cache_ttl = self.cfg.immutable_cache_ttl
285
598
 
286
- img_tag = ref.split(":")[1] if ":" in ref else "latest"
287
- img_tag = img_tag.split("@")[0] if "@" in img_tag else img_tag
288
599
  response: Response | None = fetch_url(
289
- f"https://{api_host}/v2/{img_name}/manifests/{img_tag}",
290
- cache_ttl=mutable_cache_ttl,
600
+ api_url,
601
+ cache_ttl=cache_ttl,
291
602
  bearer_token=token,
292
- response_type="application/vnd.oci.image.index.v1+json",
603
+ response_type=[
604
+ "application/vnd.oci.image.index.v1+json",
605
+ "application/vnd.docker.distribution.manifest.list.v2+json",
606
+ ],
607
+ api_stats_counter=self.api_stats,
293
608
  )
609
+
294
610
  if response is None:
295
- logger.debug("Empty response for manifest for image")
296
- return annotations
297
- if not response.is_success:
611
+ self.log.warning("Empty response for manifest for image at %s", api_url)
612
+ elif response.status_code == 429:
613
+ self.throttler.throttle(local_image_info.index_name, raise_exception=True)
614
+ elif not response.is_success:
298
615
  api_data = httpx_json_content(response, {})
299
- logger.warning(
300
- "Failed to fetch manifest: %s",
616
+ self.log.warning(
617
+ "Failed to fetch index from %s: %s",
618
+ api_url,
301
619
  api_data.get("errors") if api_data else response.text,
302
620
  )
303
- return annotations
304
- index = response.json()
305
- logger.debug(
306
- "INDEX %s manifests, %s annotations",
307
- len(index.get("manifests", [])),
308
- len(index.get("annotations", [])),
621
+ else:
622
+ index = response.json()
623
+ self.log.debug(
624
+ "INDEX %s manifests, %s annotations, api: %s, header digest: %s",
625
+ len(index.get("manifests", [])),
626
+ len(index.get("annotations", [])),
627
+ response.headers.get(HEADER_DOCKER_API, "N/A"),
628
+ response.headers.get(HEADER_DOCKER_DIGEST, "N/A"),
629
+ )
630
+ return index, response.headers.get(HEADER_DOCKER_DIGEST), CacheMetadata(response)
631
+ return None, None, None
632
+
633
+ def fetch_object(
634
+ self,
635
+ api_host: str,
636
+ local_image_info: DockerImageInfo,
637
+ media_type: str,
638
+ digest: str,
639
+ token: str | None,
640
+ follow_redirects: bool = False,
641
+ api_type: str = "manifests",
642
+ ) -> tuple[Any | None, CacheMetadata | None]:
643
+ api_url = f"https://{api_host}/v2/{local_image_info.name}/{api_type}/{digest}"
644
+ response = fetch_url(
645
+ api_url,
646
+ cache_ttl=self.cfg.immutable_cache_ttl,
647
+ bearer_token=token,
648
+ response_type=media_type,
649
+ allow_stale=True,
650
+ follow_redirects=follow_redirects,
651
+ api_stats_counter=self.api_stats,
309
652
  )
310
- annotations = index.get("annotations", {})
311
- for m in index.get("manifests", []):
312
- platform_info = m.get("platform", {})
313
- if platform_info.get("os") == os and platform_info.get("architecture") == arch:
314
- digest = m.get("digest")
315
- media_type = m.get("mediaType")
316
- response = fetch_url(
317
- f"https://{api_host}/v2/{img_name}/manifests/{digest}",
318
- cache_ttl=immutable_cache_ttl,
319
- bearer_token=token,
320
- response_type=media_type,
653
+
654
+ if response and response.is_success:
655
+ obj = httpx_json_content(response, None)
656
+ if obj:
657
+ self.log.debug(
658
+ "%s, header digest:%s, api: %s, %s annotations",
659
+ api_type.upper(),
660
+ response.headers.get(HEADER_DOCKER_DIGEST, "N/A"),
661
+ response.headers.get(HEADER_DOCKER_API, "N/A"),
662
+ len(obj.get("annotations", [])),
663
+ )
664
+ return obj, CacheMetadata(response)
665
+ elif response and response.status_code == 429:
666
+ self.throttler.throttle(local_image_info.index_name, raise_exception=True)
667
+ elif response and not response.is_success:
668
+ api_data = httpx_json_content(response, {})
669
+ if response:
670
+ self.log.warning(
671
+ "Failed to fetch obj from %s: %s %s",
672
+ api_url,
673
+ response.status_code,
674
+ api_data.get("errors") if api_data else response.text,
675
+ )
676
+ else:
677
+ self.log.warning(
678
+ "Failed to fetch obj from %s: No Response, %s", api_url, api_data.get("errors") if api_data else None
321
679
  )
322
- if response and response.is_success:
323
- api_data = httpx_json_content(response, None)
324
- if api_data:
325
- logger.debug(
326
- "MANIFEST %s layers, %s annotations",
327
- len(api_data.get("layers", [])),
328
- len(api_data.get("annotations", [])),
329
- )
330
- if api_data.get("annotations"):
331
- annotations.update(api_data.get("annotations", {}))
332
- else:
333
- logger.debug("No annotations found in manifest: %s", api_data)
334
680
 
335
- if not annotations:
336
- logger.debug("No annotations found from registry data")
337
- return annotations
681
+ else:
682
+ self.log.error("Empty response from %s", api_url)
683
+ return None, None
338
684
 
685
+ def lookup(
686
+ self,
687
+ local_image_info: DockerImageInfo,
688
+ token: str | None = None,
689
+ minimal: bool = False,
690
+ **kwargs, # noqa: ANN003, ARG002
691
+ ) -> DockerImageInfo:
692
+ result: DockerImageInfo = DockerImageInfo(local_image_info.ref)
693
+ if not local_image_info.name or not local_image_info.index_name:
694
+ self.log.debug("No local pkg name or registry index name to check")
695
+ return result
696
+
697
+ if self.throttler.check_throttle(local_image_info.index_name):
698
+ result.throttled = True
699
+ return result
700
+
701
+ if token:
702
+ self.log.debug("Using provided token to fetch manifest for image %s", local_image_info.ref)
703
+ else:
704
+ try:
705
+ token = self.fetch_token(local_image_info.index_name, local_image_info.name)
706
+ except AuthError as e:
707
+ self.log.warning("Authentication error prevented Docker Registry enrichment: %s", e)
708
+ result.error = str(e)
709
+ return result
710
+
711
+ index: Any | None = None
712
+ index_digest: str | None = None # fetched from header, should be the image digest
713
+ index_cache_metadata: CacheMetadata | None = None
714
+ manifest_cache_metadata: CacheMetadata | None = None
715
+ config_cache_metadata: CacheMetadata | None = None
716
+ api_host: str | None = REGISTRIES.get(
717
+ local_image_info.index_name, (local_image_info.index_name, local_image_info.index_name)
718
+ )[1]
719
+ if api_host is None:
720
+ self.log("No API host can be determined for %s", local_image_info.index_name)
721
+ return result
722
+ try:
723
+ index, index_digest, index_cache_metadata = self.fetch_index(api_host, local_image_info, token)
724
+ except ThrottledError:
725
+ result.throttled = True
726
+ index = None
727
+
728
+ if index:
729
+ result.annotations = index.get("annotations", {})
730
+ for m in index.get("manifests", []):
731
+ platform_info = m.get("platform", {})
732
+ if (
733
+ platform_info.get("os") == local_image_info.os
734
+ and platform_info.get("architecture") == local_image_info.arch
735
+ and ("Variant" not in platform_info or platform_info.get("Variant") == local_image_info.variant)
736
+ ):
737
+ if index_digest:
738
+ result.image_digest = index_digest
739
+ result.short_digest = result.condense_digest(index_digest)
740
+ log.debug("Setting %s image digest %s", result.name, result.short_digest)
741
+
742
+ digest: str | None = m.get("digest")
743
+ media_type = m.get("mediaType")
744
+ manifest: Any | None = None
745
+
746
+ if digest:
747
+ try:
748
+ manifest, manifest_cache_metadata = self.fetch_object(
749
+ api_host, local_image_info, media_type, digest, token
750
+ )
751
+ except ThrottledError:
752
+ result.throttled = True
753
+
754
+ if manifest:
755
+ digest = manifest.get("config", {}).get("digest")
756
+ if digest is None:
757
+ self.log.warning("Empty digest for %s %s %s", api_host, digest, media_type)
758
+ else:
759
+ result.repo_digest = result.condense_digest(digest, short=False)
760
+ log.debug("Setting %s repo digest: %s", result.name, result.repo_digest)
339
761
 
340
- r"""
341
- https://ghcr.io/token\?scope\="repository:rhizomatics/updates2mqtt:pull"
342
- https://ghcr.io/v2/rhizomatics/updates2mqtt/manifests/sha256:2c8edc1f9400ef02a93c3b754d4419082ceb5d049178c3a3968e3fd56caf7f29 Accept:application/vnd.oci.image.index.v1+json Accept:application/vnd.oci.image.manifest.v1+json Accept:application/vnd.docker.distribution.manifest.v2+json
343
- https://ghcr.io/v2/rhizomatics/updates2mqtt/manifests/latest Accept:application/vnd.oci.image.index.v1+json Accept:application/vnd.oci.image.manifest.v1+json Accept:appli
344
- """ # noqa: E501
762
+ if manifest.get("annotations"):
763
+ result.annotations.update(manifest.get("annotations", {}))
764
+ else:
765
+ self.log.debug("No annotations found in manifest: %s", manifest)
766
+
767
+ if not minimal and manifest.get("config"):
768
+ try:
769
+ img_config, config_cache_metadata = self.fetch_object(
770
+ api_host=api_host,
771
+ local_image_info=local_image_info,
772
+ media_type=manifest["config"].get("mediaType"),
773
+ digest=manifest["config"].get("digest"),
774
+ token=token,
775
+ follow_redirects=True,
776
+ api_type="blobs",
777
+ )
778
+ if img_config:
779
+ config = img_config.get("config") or img_config.get("Config")
780
+ if config and "Labels" in config:
781
+ result.annotations.update(config.get("Labels") or {})
782
+ result.annotations.update(img_config.get("annotations") or {})
783
+ else:
784
+ self.log.debug("No config found: %s", manifest)
785
+ except Exception as e:
786
+ self.log.warning("Failed to extract %s image info from config: %s", local_image_info.ref, e)
787
+
788
+ if not result.annotations:
789
+ self.log.debug("No annotations found from registry data")
790
+
791
+ labels: dict[str, str | float | int | bool | None] = cherrypick_annotations(local_image_info, result)
792
+ result.custom = labels or {}
793
+ if index_cache_metadata:
794
+ result.custom["index_cache_age"] = index_cache_metadata.age
795
+ if manifest_cache_metadata:
796
+ result.custom["manifest_cache_age"] = manifest_cache_metadata.age
797
+ if config_cache_metadata:
798
+ result.custom["config_cache_age"] = config_cache_metadata.age
799
+ result.version = cast("str|None", labels.get("image_version"))
800
+ result.origin = "OCI_V2" if not minimal else "OCI_V2_MINIMAL"
801
+
802
+ self.log.debug(
803
+ "OCI_V2 Lookup for %s: short_digest:%s, repo_digest:%s, version: %s",
804
+ local_image_info.name,
805
+ result.short_digest,
806
+ result.repo_digest,
807
+ result.version,
808
+ )
809
+ return result
810
+
811
+
812
+ class DockerClientVersionLookup(VersionLookup):
813
+ """Query remote registry via local Docker API
814
+
815
+ No auth needed, however uses the old v1 APIs, and only Index available via API
816
+ """
817
+
818
+ def __init__(self, client: docker.DockerClient, throttler: Throttler, cfg: RegistryConfig, api_backoff: int = 30) -> None:
819
+ self.client: docker.DockerClient = client
820
+ self.throttler: Throttler = throttler
821
+ self.cfg: RegistryConfig = cfg
822
+ self.api_backoff: int = api_backoff
823
+ self.log: Any = structlog.get_logger().bind(integration="docker", tool="version_lookup")
824
+
825
+ def lookup(self, local_image_info: DockerImageInfo, retries: int = 3, **kwargs) -> DockerImageInfo: # noqa: ANN003, ARG002
826
+ retries_left = retries
827
+ retry_secs: int = self.api_backoff
828
+ reg_data: RegistryData | None = None
829
+
830
+ result = DockerImageInfo(local_image_info.ref)
831
+ if local_image_info.index_name is None or local_image_info.ref is None:
832
+ return result
833
+
834
+ while reg_data is None and retries_left > 0:
835
+ if self.throttler.check_throttle(local_image_info.index_name):
836
+ result.throttled = True
837
+ break
838
+ try:
839
+ self.log.debug("Fetching registry data", image_ref=local_image_info.ref)
840
+ reg_data = self.client.images.get_registry_data(local_image_info.ref)
841
+ self.log.debug(
842
+ "Registry Data: id:%s,image:%s, attrs:%s",
843
+ reg_data.id,
844
+ reg_data.image_name,
845
+ reg_data.attrs,
846
+ )
847
+ if reg_data:
848
+ result.short_digest = result.condense_digest(reg_data.short_id)
849
+ result.image_digest = result.condense_digest(reg_data.id, short=False)
850
+ # result.name = reg_data.image_name
851
+ result.attributes = reg_data.attrs
852
+ result.annotations = reg_data.attrs.get("Config", {}).get("Labels") or {}
853
+ result.error = None
854
+
855
+ except docker.errors.APIError as e:
856
+ if e.status_code == HTTPStatus.TOO_MANY_REQUESTS:
857
+ retry_secs = round(retry_secs**1.5)
858
+ try:
859
+ retry_secs = int(e.response.headers.get("Retry-After", -1)) # type: ignore[union-attr]
860
+ except Exception as e2:
861
+ self.log.debug("Failed to access headers for retry info: %s", e2)
862
+ self.throttler.throttle(local_image_info.index_name, retry_secs, e.explanation)
863
+ result.throttled = True
864
+ return result
865
+ result.error = str(e)
866
+ retries_left -= 1
867
+ if retries_left == 0 or e.is_client_error():
868
+ self.log.warn("Failed to fetch registry data: [%s] %s", e.errno, e.explanation)
869
+ else:
870
+ self.log.debug("Failed to fetch registry data, retrying: %s", e)
871
+
872
+ labels: dict[str, str | float | int | bool | None] = cherrypick_annotations(local_image_info, result)
873
+ result.custom = labels or {}
874
+ result.version = cast("str|None", labels.get("image_version"))
875
+ result.origin = "DOCKER_CLIENT"
876
+ return result