updates2mqtt 1.6.0__py3-none-any.whl → 1.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
updates2mqtt/app.py CHANGED
@@ -14,7 +14,7 @@ import structlog
14
14
  import updates2mqtt
15
15
  from updates2mqtt.model import Discovery, ReleaseProvider
16
16
 
17
- from .config import Config, PackageUpdateInfo, load_app_config, load_package_info
17
+ from .config import Config, PublishPolicy, UpdatePolicy, load_app_config
18
18
  from .integrations.docker import DockerProvider
19
19
  from .mqtt import MqttPublisher
20
20
 
@@ -49,7 +49,6 @@ class App:
49
49
 
50
50
  structlog.configure(wrapper_class=structlog.make_filtering_bound_logger(getattr(logging, str(self.cfg.log.level))))
51
51
  log.debug("Logging initialized", level=self.cfg.log.level)
52
- self.common_pkg: dict[str, PackageUpdateInfo] = load_package_info(PKG_INFO_FILE)
53
52
 
54
53
  self.publisher = MqttPublisher(self.cfg.mqtt, self.cfg.node, self.cfg.homeassistant)
55
54
 
@@ -57,7 +56,7 @@ class App:
57
56
  self.scan_count: int = 0
58
57
  self.last_scan: str | None = None
59
58
  if self.cfg.docker.enabled:
60
- self.scanners.append(DockerProvider(self.cfg.docker, self.common_pkg, self.cfg.node, self.self_bounce))
59
+ self.scanners.append(DockerProvider(self.cfg.docker, self.cfg.node, self.self_bounce))
61
60
  self.stopped = Event()
62
61
  self.healthcheck_topic = self.cfg.node.healthcheck.topic_template.format(node_name=self.cfg.node.name)
63
62
 
@@ -104,6 +103,7 @@ class App:
104
103
  )
105
104
 
106
105
  for scanner in self.scanners:
106
+ scanner.initialize()
107
107
  self.publisher.subscribe_hass_command(scanner)
108
108
 
109
109
  while not self.stopped.is_set() and self.publisher.is_available():
@@ -122,12 +122,15 @@ class App:
122
122
  async def on_discovery(self, discovery: Discovery) -> None:
123
123
  dlog = log.bind(name=discovery.name)
124
124
  try:
125
- if self.cfg.homeassistant.discovery.enabled:
125
+ if discovery.publish_policy == PublishPolicy.HOMEASSISTANT and self.cfg.homeassistant.discovery.enabled:
126
+ # Switch off MQTT discovery if not Home Assistant enabled
126
127
  self.publisher.publish_hass_config(discovery)
127
-
128
- self.publisher.publish_hass_state(discovery)
128
+ if discovery.publish_policy in (PublishPolicy.HOMEASSISTANT):
129
+ self.publisher.publish_hass_state(discovery)
130
+ if discovery.publish_policy in (PublishPolicy.HOMEASSISTANT, PublishPolicy.MQTT):
131
+ self.publisher.publish_discovery(discovery)
129
132
  if (
130
- discovery.update_policy == "Auto"
133
+ discovery.update_policy == UpdatePolicy.AUTO
131
134
  and discovery.can_update
132
135
  and discovery.latest_version != discovery.current_version
133
136
  ):
updates2mqtt/cli.py ADDED
@@ -0,0 +1,150 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ import structlog
4
+ from omegaconf import DictConfig, OmegaConf
5
+ from rich import print_json
6
+
7
+ from updates2mqtt.config import DockerConfig, NodeConfig, RegistryConfig
8
+ from updates2mqtt.helpers import Throttler
9
+ from updates2mqtt.integrations.docker import DockerProvider
10
+ from updates2mqtt.integrations.docker_enrich import (
11
+ REGISTRIES,
12
+ ContainerDistributionAPIVersionLookup,
13
+ DockerImageInfo,
14
+ fetch_url,
15
+ )
16
+ from updates2mqtt.model import Discovery
17
+
18
+ if TYPE_CHECKING:
19
+ from httpx import Response
20
+
21
+ log = structlog.get_logger()
22
+
23
+
24
+ """
25
+ Super simple CLI
26
+
27
+ python updates2mqtt.cli container=frigate
28
+
29
+ python updates2mqtt.cli container=frigate api=docker_client log_level=DEBUG
30
+
31
+ ython3 updates2mqtt/cli.py blob=ghcr.io/homarr-labs/homarr@sha256:af79a3339de5ed8ef7f5a0186ff3deb86f40b213ba75249291f2f68aef082a25 | jq '.config.Labels'
32
+
33
+ python3 updates2mqtt/cli.py manifest=ghcr.io/blakeblackshear/frigate:stable
34
+
35
+ python3 updates2mqtt/cli.py blob=ghcr.io/blakeblackshear/frigate@sha256:ef8d56a7d50b545af176e950ce328aec7f0b7bc5baebdca189fe661d97924980
36
+
37
+ python3 updates2mqtt/cli.py manifest=ghcr.io/blakeblackshear/frigate@sha256:c68fd78fd3237c9ba81b5aa927f17b54f46705990f43b4b5d5596cfbbb626af4
38
+ """ # noqa: E501
39
+
40
+ OCI_MANIFEST_TYPES: list[str] = [
41
+ "application/vnd.oci.image.manifest.v1+json",
42
+ "application/vnd.oci.image.index.v1+json",
43
+ "application/vnd.oci.descriptor.v1+json",
44
+ "application/vnd.oci.empty.v1+json",
45
+ ]
46
+
47
+ OCI_CONFIG_TYPES: list[str] = [
48
+ "application/vnd.oci.image.config.v1+json",
49
+ ]
50
+
51
+ OCI_LAYER_TYPES: list[str] = [
52
+ "application/vnd.oci.image.layer.v1.tar",
53
+ "application/vnd.oci.image.layer.v1.tar+gzip",
54
+ "application/vnd.oci.image.layer.v1.tar+zstd",
55
+ ]
56
+
57
+ OCI_NONDISTRIBUTABLE_LAYER_TYPES: list[str] = [
58
+ "application/vnd.oci.image.layer.nondistributable.v1.tar",
59
+ "application/vnd.oci.image.layer.nondistributable.v1.tar+gzip",
60
+ "application/vnd.oci.image.layer.nondistributable.v1.tar+zstd",
61
+ ]
62
+
63
+ # Docker Compatibility MIME Types
64
+ DOCKER_MANIFEST_TYPES: list[str] = [
65
+ "application/vnd.docker.distribution.manifest.v2+json",
66
+ "application/vnd.docker.distribution.manifest.list.v2+json",
67
+ "application/vnd.docker.distribution.manifest.v1+json",
68
+ "application/vnd.docker.distribution.manifest.v1+prettyjws",
69
+ ]
70
+
71
+ DOCKER_CONFIG_TYPES: list[str] = [
72
+ "application/vnd.docker.container.image.v1+json",
73
+ ]
74
+
75
+ DOCKER_LAYER_TYPES: list[str] = [
76
+ "application/vnd.docker.image.rootfs.diff.tar.gzip",
77
+ "application/vnd.docker.image.rootfs.foreign.diff.tar.gzip",
78
+ ]
79
+
80
+ # Combined constants
81
+ ALL_MANIFEST_TYPES: list[str] = OCI_MANIFEST_TYPES + DOCKER_MANIFEST_TYPES
82
+ ALL_CONFIG_TYPES: list[str] = OCI_CONFIG_TYPES + DOCKER_CONFIG_TYPES
83
+ ALL_LAYER_TYPES: list[str] = OCI_LAYER_TYPES + OCI_NONDISTRIBUTABLE_LAYER_TYPES + DOCKER_LAYER_TYPES
84
+
85
+ # All content types that might be returned by the API
86
+ ALL_OCI_MEDIA_TYPES: list[str] = (
87
+ ALL_MANIFEST_TYPES
88
+ + ALL_CONFIG_TYPES
89
+ + ALL_LAYER_TYPES
90
+ + ["application/octet-stream", "application/json"] # Error responses
91
+ )
92
+
93
+
94
+ def dump_url(doc_type: str, img_ref: str) -> None:
95
+ lookup = ContainerDistributionAPIVersionLookup(Throttler(), RegistryConfig())
96
+ img_info = DockerImageInfo(img_ref)
97
+ if not img_info.index_name or not img_info.name:
98
+ log.error("Unable to parse %ss", img_ref)
99
+ return
100
+
101
+ api_host: str | None = REGISTRIES.get(img_info.index_name, (img_info.index_name, img_info.index_name))[1]
102
+
103
+ if doc_type == "blob":
104
+ if not img_info.pinned_digest:
105
+ log.warning("No digest found in %s", img_ref)
106
+ return
107
+ url: str = f"https://{api_host}/v2/{img_info.name}/blobs/{img_info.pinned_digest}"
108
+ elif doc_type == "manifest":
109
+ if not img_info.tag_or_digest:
110
+ log.warning("No tag or digest found in %s", img_ref)
111
+ return
112
+ url = f"https://{api_host}/v2/{img_info.name}/manifests/{img_info.tag_or_digest}"
113
+ else:
114
+ return
115
+
116
+ token: str | None = lookup.fetch_token(img_info.index_name, img_info.name)
117
+
118
+ response: Response | None = fetch_url(url, bearer_token=token, follow_redirects=True, response_type=ALL_OCI_MEDIA_TYPES)
119
+ if response:
120
+ log.debug(f"{response.status_code}: {url}")
121
+ log.debug("HEADERS")
122
+ for k, v in response.headers.items():
123
+ log.debug(f"{k}: {v}")
124
+ log.debug("CONTENTS")
125
+ print_json(response.text)
126
+
127
+
128
+ def main() -> None:
129
+ # will be a proper cli someday
130
+ cli_conf: DictConfig = OmegaConf.from_cli()
131
+ structlog.configure(wrapper_class=structlog.make_filtering_bound_logger(cli_conf.get("log_level", "WARNING")))
132
+
133
+ if cli_conf.get("blob"):
134
+ dump_url("blob", cli_conf.get("blob"))
135
+ elif cli_conf.get("manifest"):
136
+ dump_url("manifest", cli_conf.get("manifest"))
137
+
138
+ else:
139
+ docker_scanner = DockerProvider(
140
+ DockerConfig(registry=RegistryConfig(api=cli_conf.get("api", "OCI_V2"))), NodeConfig(), None
141
+ )
142
+ discovery: Discovery | None = docker_scanner.rescan(
143
+ Discovery(docker_scanner, cli_conf.get("container", "frigate"), "cli", "manual")
144
+ )
145
+ if discovery:
146
+ log.info(discovery.as_dict())
147
+
148
+
149
+ if __name__ == "__main__":
150
+ main()
updates2mqtt/config.py CHANGED
@@ -9,6 +9,23 @@ from omegaconf import MISSING, DictConfig, MissingMandatoryValue, OmegaConf, Val
9
9
 
10
10
  log = structlog.get_logger()
11
11
 
12
+ PKG_INFO_FILE = Path("./common_packages.yaml")
13
+ UNKNOWN_VERSION = "UNKNOWN"
14
+ VERSION_RE = r"[vVr]?[0-9]+(\.[0-9]+)*"
15
+ # source: https://semver.org/#is-there-a-suggested-regular-expression-regex-to-check-a-semver-string
16
+ SEMVER_RE = r"^(?P<major>0|[1-9]\d*)\.(?P<minor>0|[1-9]\d*)\.(?P<patch>0|[1-9]\d*)(?:-(?P<prerelease>(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$" # noqa: E501
17
+
18
+
19
+ class UpdatePolicy(StrEnum):
20
+ AUTO = "Auto"
21
+ PASSIVE = "Passive"
22
+
23
+
24
+ class PublishPolicy(StrEnum):
25
+ HOMEASSISTANT = "HomeAssistant"
26
+ MQTT = "MQTT"
27
+ SILENT = "Silent"
28
+
12
29
 
13
30
  class LogLevel(StrEnum):
14
31
  DEBUG = "DEBUG"
@@ -18,6 +35,28 @@ class LogLevel(StrEnum):
18
35
  CRITICAL = "CRITICAL"
19
36
 
20
37
 
38
+ class RegistryAPI(StrEnum):
39
+ OCI_V2 = "OCI_V2"
40
+ OCI_V2_MINIMAL = "OCI_V2"
41
+ DOCKER_CLIENT = "DOCKER_CLIENT"
42
+ DISABLED = "DISABLED"
43
+
44
+
45
+ class VersionType:
46
+ SHORT_SHA = "short_sha"
47
+ FULL_SHA = "full_sha"
48
+ VERSION_REVISION = "version_revision"
49
+ VERSION = "version"
50
+
51
+
52
+ @dataclass
53
+ class RegistryConfig:
54
+ api: RegistryAPI = RegistryAPI.OCI_V2
55
+ mutable_cache_ttl: int | None = None # default to server cache hint
56
+ immutable_cache_ttl: int | None = 7776000 # 90 days
57
+ token_cache_ttl: int | None = None # default to server cache hint
58
+
59
+
21
60
  @dataclass
22
61
  class MqttConfig:
23
62
  host: str = "${oc.env:MQTT_HOST,localhost}"
@@ -34,6 +73,19 @@ class MetadataSourceConfig:
34
73
  cache_ttl: int = 60 * 60 * 24 * 7 # 1 week
35
74
 
36
75
 
76
+ @dataclass
77
+ class Selector:
78
+ include: list[str] | None = None
79
+ exclude: list[str] | None = None
80
+
81
+
82
+ class VersionPolicy(StrEnum):
83
+ AUTO = "AUTO"
84
+ VERSION = "VERSION"
85
+ DIGEST = "DIGEST"
86
+ VERSION_DIGEST = "VERSION_DIGEST"
87
+
88
+
37
89
  @dataclass
38
90
  class DockerConfig:
39
91
  enabled: bool = True
@@ -47,7 +99,12 @@ class DockerConfig:
47
99
  discover_metadata: dict[str, MetadataSourceConfig] = field(
48
100
  default_factory=lambda: {"linuxserver.io": MetadataSourceConfig(enabled=True)}
49
101
  )
50
- api_throttle_wait: int = 60 * 15
102
+ registry: RegistryConfig = field(default_factory=lambda: RegistryConfig())
103
+ default_api_backoff: int = 60 * 15
104
+ image_ref_select: Selector = field(default_factory=lambda: Selector())
105
+ version_select: Selector = field(default_factory=lambda: Selector())
106
+ version_policy: VersionPolicy = VersionPolicy.AUTO
107
+ registry_select: Selector = field(default_factory=lambda: Selector())
51
108
 
52
109
 
53
110
  @dataclass
@@ -62,6 +119,7 @@ class HomeAssistantConfig:
62
119
  state_topic_suffix: str = "state"
63
120
  device_creation: bool = True
64
121
  force_command_topic: bool = False
122
+ extra_attributes: bool = True
65
123
  area: str | None = None
66
124
 
67
125
 
@@ -96,7 +154,7 @@ class Config:
96
154
 
97
155
  @dataclass
98
156
  class DockerPackageUpdateInfo:
99
- image_name: str = MISSING
157
+ image_name: str = MISSING # untagged image ref
100
158
 
101
159
 
102
160
  @dataclass
@@ -104,6 +162,7 @@ class PackageUpdateInfo:
104
162
  docker: DockerPackageUpdateInfo | None = field(default_factory=DockerPackageUpdateInfo)
105
163
  logo_url: str | None = None
106
164
  release_notes_url: str | None = None
165
+ source_repo_url: str | None = None
107
166
 
108
167
 
109
168
  @dataclass
@@ -115,24 +174,6 @@ class IncompleteConfigException(BaseException):
115
174
  pass
116
175
 
117
176
 
118
- def load_package_info(pkginfo_file_path: Path) -> dict[str, PackageUpdateInfo]:
119
- if pkginfo_file_path.exists():
120
- log.debug("Loading common package update info", path=pkginfo_file_path)
121
- cfg = OmegaConf.load(pkginfo_file_path)
122
- else:
123
- log.warn("No common package update info found", path=pkginfo_file_path)
124
- cfg = OmegaConf.structured(UpdateInfoConfig)
125
- try:
126
- # omegaconf broken-ness on optional fields and converting to backclasses
127
- pkg_conf: dict[str, PackageUpdateInfo] = {
128
- pkg: PackageUpdateInfo(**pkg_cfg) for pkg, pkg_cfg in cfg.common_packages.items()
129
- }
130
- return pkg_conf
131
- except (MissingMandatoryValue, ValidationError) as e:
132
- log.error("Configuration error %s", e, path=pkginfo_file_path.as_posix())
133
- raise
134
-
135
-
136
177
  def is_autogen_config() -> bool:
137
178
  env_var: str | None = os.environ.get("U2M_AUTOGEN_CONFIG")
138
179
  return not (env_var and env_var.lower() in ("no", "0", "false"))
@@ -23,28 +23,26 @@ def hass_format_config(
23
23
  object_id: str,
24
24
  state_topic: str,
25
25
  command_topic: str | None,
26
+ attrs_topic: str | None,
26
27
  force_command_topic: bool | None,
27
28
  device_creation: bool = True,
28
29
  area: str | None = None,
29
- session: str | None = None,
30
30
  ) -> dict[str, Any]:
31
31
  config: dict[str, Any] = {
32
32
  "name": discovery.title,
33
33
  "device_class": None, # not firmware, so defaults to null
34
34
  "unique_id": object_id,
35
35
  "state_topic": state_topic,
36
- "source_session": session,
37
36
  "supported_features": discovery.features,
38
- "can_update": discovery.can_update,
39
- "can_build": discovery.can_build,
40
- "can_restart": discovery.can_restart,
41
- "update_policy": discovery.update_policy,
37
+ "default_entity_id": f"update.{discovery.node}_{discovery.provider.source_type}_{discovery.name}",
42
38
  "origin": {
43
39
  "name": f"{discovery.node} updates2mqtt",
44
40
  "sw_version": updates2mqtt.version, # pyright: ignore[reportAttributeAccessIssue]
45
41
  "support_url": "https://github.com/rhizomatics/updates2mqtt/issues",
46
42
  },
47
43
  }
44
+ if attrs_topic:
45
+ config["json_attributes_topic"] = attrs_topic
48
46
  if discovery.entity_picture_url:
49
47
  config["entity_picture"] = discovery.entity_picture_url
50
48
  if discovery.device_icon:
@@ -62,28 +60,21 @@ def hass_format_config(
62
60
  config["command_topic"] = command_topic
63
61
  if discovery.can_update:
64
62
  config["payload_install"] = f"{discovery.source_type}|{discovery.name}|install"
65
- config["custom"] = {}
66
- config["custom"][discovery.source_type] = discovery.custom
67
- config.update(discovery.provider.hass_config_format(discovery))
63
+
68
64
  return config
69
65
 
70
66
 
71
67
  def hass_format_state(discovery: Discovery, session: str, in_progress: bool = False) -> dict[str, Any]: # noqa: ARG001
72
- state = {
68
+ state: dict[str, str | dict | list | bool | None] = {
73
69
  "installed_version": discovery.current_version,
74
70
  "latest_version": discovery.latest_version,
75
71
  "title": discovery.title,
76
72
  "in_progress": in_progress,
77
73
  }
78
- if discovery.release_summary:
79
- state["release_summary"] = discovery.release_summary
80
- if discovery.release_url:
81
- state["release_url"] = discovery.release_url
82
- custom_state = discovery.provider.hass_state_format(discovery)
83
- if custom_state:
84
- state.update(custom_state)
85
- invalid_keys = [k for k in state if k not in HASS_UPDATE_SCHEMA]
86
- if invalid_keys:
87
- log.warning(f"Invalid keys in state: {invalid_keys}")
88
- state = {k: v for k, v in state.items() if k in HASS_UPDATE_SCHEMA}
74
+ if discovery.release_detail:
75
+ if discovery.release_detail.summary:
76
+ state["release_summary"] = discovery.release_detail.summary
77
+ if discovery.release_detail.notes_url:
78
+ state["release_url"] = discovery.release_detail.notes_url
79
+
89
80
  return state
@@ -0,0 +1,226 @@
1
+ import datetime as dt
2
+ import re
3
+ import time
4
+ from threading import Event
5
+ from typing import Any
6
+ from urllib.parse import urlparse
7
+
8
+ import structlog
9
+ from hishel import CacheOptions, SpecificationPolicy # pyright: ignore[reportAttributeAccessIssue]
10
+ from hishel.httpx import SyncCacheClient
11
+ from httpx import Response
12
+ from tzlocal import get_localzone
13
+
14
+ from updates2mqtt.config import Selector
15
+
16
+ log = structlog.get_logger()
17
+
18
+
19
+ def timestamp(time_value: float | None) -> str | None:
20
+ if time_value is None:
21
+ return None
22
+ try:
23
+ return dt.datetime.fromtimestamp(time_value, tz=get_localzone()).isoformat()
24
+ except: # noqa: E722
25
+ return None
26
+
27
+
28
+ class Selection:
29
+ def __init__(self, selector: Selector, value: str | None) -> None:
30
+ self.result: bool = True
31
+ self.matched: str | None = None
32
+ if value is None:
33
+ self.result = selector.include is None
34
+ return
35
+ if selector.exclude is not None:
36
+ self.result = True
37
+ if any(re.search(pat, value) for pat in selector.exclude):
38
+ self.matched = value
39
+ self.result = False
40
+ if selector.include is not None:
41
+ self.result = False
42
+ if any(re.search(pat, value) for pat in selector.include):
43
+ self.matched = value
44
+ self.result = True
45
+
46
+ def __bool__(self) -> bool:
47
+ """Expose the actual boolean so objects can be appropriately truthy"""
48
+ return self.result
49
+
50
+
51
+ class ThrottledError(Exception):
52
+ def __init__(self, message: str, retry_secs: int) -> None:
53
+ super().__init__(message)
54
+ self.retry_secs = retry_secs
55
+
56
+
57
+ class Throttler:
58
+ DEFAULT_SITE = "DEFAULT_SITE"
59
+
60
+ def __init__(self, api_throttle_pause: int = 30, logger: Any | None = None, semaphore: Event | None = None) -> None:
61
+ self.log: Any = logger or log
62
+ self.pause_api_until: dict[str, float] = {}
63
+ self.api_throttle_pause: int = api_throttle_pause
64
+ self.semaphore = semaphore
65
+
66
+ def check_throttle(self, index_name: str | None = None) -> bool:
67
+ if self.semaphore and self.semaphore.is_set():
68
+ return True
69
+ index_name = index_name or self.DEFAULT_SITE
70
+ if self.pause_api_until.get(index_name) is not None:
71
+ if self.pause_api_until[index_name] < time.time():
72
+ del self.pause_api_until[index_name]
73
+ self.log.info("%s throttling wait complete", index_name)
74
+ else:
75
+ self.log.debug("%s throttling has %0.3f secs left", index_name, self.pause_api_until[index_name] - time.time())
76
+ return True
77
+ return False
78
+
79
+ def throttle(
80
+ self,
81
+ index_name: str | None = None,
82
+ retry_secs: int | None = None,
83
+ explanation: str | None = None,
84
+ raise_exception: bool = False,
85
+ ) -> None:
86
+ index_name = index_name or self.DEFAULT_SITE
87
+ retry_secs = retry_secs if retry_secs and retry_secs > 0 else self.api_throttle_pause
88
+ self.log.warn("%s throttling requests for %s seconds, %s", index_name, retry_secs, explanation)
89
+ self.pause_api_until[index_name] = time.time() + retry_secs
90
+ if raise_exception:
91
+ raise ThrottledError(explanation or f"{index_name} throttled request", retry_secs)
92
+
93
+
94
+ class CacheMetadata:
95
+ """Cache metadata extracted from hishel response extensions"""
96
+
97
+ def __init__(self, response: Response) -> None:
98
+ self.from_cache: bool = response.extensions.get("hishel_from_cache", False)
99
+ self.revalidated: bool = response.extensions.get("hishel_revalidated", False)
100
+ self.created_at: float | None = response.extensions.get("hishel_created_at")
101
+ self.stored: bool = response.extensions.get("hishel_stored", False)
102
+ self.age: float | None = None
103
+ if self.created_at is not None:
104
+ self.age = time.time() - self.created_at
105
+
106
+ def __str__(self) -> str:
107
+ """Summarize in a string"""
108
+ return f"cached: {self.from_cache}, revalidated: {self.revalidated}, age:{self.age}, stored:{self.stored}"
109
+
110
+
111
+ class APIStats:
112
+ def __init__(self) -> None:
113
+ self.fetches: int = 0
114
+ self.cached: int = 0
115
+ self.revalidated: int = 0
116
+ self.failed: dict[int, int] = {}
117
+ self.elapsed: float = 0
118
+ self.max_cache_age: float | None = None
119
+
120
+ def tick(self, response: Response | None) -> None:
121
+ self.fetches += 1
122
+ if response is None:
123
+ self.failed.setdefault(0, 0)
124
+ self.failed[0] += 1
125
+ return
126
+ cache_metadata: CacheMetadata = CacheMetadata(response)
127
+ self.cached += 1 if cache_metadata.from_cache else 0
128
+ self.revalidated += 1 if cache_metadata.revalidated else 0
129
+ if response.elapsed:
130
+ self.elapsed += response.elapsed.microseconds / 1000000
131
+ self.elapsed += response.elapsed.seconds
132
+ if not response.is_success:
133
+ self.failed.setdefault(response.status_code, 0)
134
+ self.failed[response.status_code] += 1
135
+ if cache_metadata.age is not None and (self.max_cache_age is None or cache_metadata.age > self.max_cache_age):
136
+ self.max_cache_age = cache_metadata.age
137
+
138
+ def hit_ratio(self) -> float:
139
+ return round(self.cached / self.fetches, 2) if self.cached and self.fetches else 0
140
+
141
+ def average_elapsed(self) -> float:
142
+ return round(self.elapsed / self.fetches, 2) if self.elapsed and self.fetches else 0
143
+
144
+ def __str__(self) -> str:
145
+ """Log line friendly string summary"""
146
+ return (
147
+ f"fetches: {self.fetches}, cache ratio: {self.hit_ratio():.2%}, revalidated: {self.revalidated}, "
148
+ + f"errors: {', '.join(f'{status_code}:{fails}' for status_code, fails in self.failed.items())}, "
149
+ + f"oldest cache hit: {self.max_cache_age:.2f}, avg elapsed: {self.average_elapsed()}"
150
+ )
151
+
152
+
153
+ class APIStatsCounter:
154
+ def __init__(self) -> None:
155
+ self.stats_report_interval: int = 100
156
+ self.host_stats: dict[str, APIStats] = {}
157
+ self.fetches: int = 0
158
+ self.log: Any = structlog.get_logger().bind()
159
+
160
+ def stats(self, url: str, response: Response | None) -> None:
161
+ try:
162
+ host: str = urlparse(url).hostname or "UNKNOWN"
163
+ api_stats: APIStats = self.host_stats.setdefault(host, APIStats())
164
+ api_stats.tick(response)
165
+ self.fetches += 1
166
+ if self.fetches % self.stats_report_interval == 0:
167
+ self.log.info(
168
+ "OCI_V2 API Stats Summary\n%s", "\n".join(f"{host} {stats}" for host, stats in self.host_stats.items())
169
+ )
170
+ except Exception as e:
171
+ self.log.warning("Failed to tick stats: %s", e)
172
+
173
+
174
+ def fetch_url(
175
+ url: str,
176
+ cache_ttl: int | None = None, # default to server responses for cache ttl
177
+ bearer_token: str | None = None,
178
+ response_type: str | list[str] | None = None,
179
+ follow_redirects: bool = False,
180
+ allow_stale: bool = False,
181
+ method: str = "GET",
182
+ api_stats_counter: APIStatsCounter | None = None,
183
+ ) -> Response | None:
184
+ try:
185
+ headers = [("cache-control", f"max-age={cache_ttl}")]
186
+ if bearer_token:
187
+ headers.append(("Authorization", f"Bearer {bearer_token}"))
188
+ if response_type:
189
+ response_type = [response_type] if isinstance(response_type, str) else response_type
190
+ if response_type and isinstance(response_type, (tuple, list)):
191
+ headers.extend(("Accept", mime_type) for mime_type in response_type)
192
+
193
+ cache_policy = SpecificationPolicy(
194
+ cache_options=CacheOptions(
195
+ shared=False, # Private browser cache
196
+ allow_stale=allow_stale,
197
+ )
198
+ )
199
+ with SyncCacheClient(headers=headers, follow_redirects=follow_redirects, policy=cache_policy) as client:
200
+ log.debug(f"Fetching URL {url}, redirects={follow_redirects}, headers={headers}, cache_ttl={cache_ttl}")
201
+ response: Response = client.request(method=method, url=url, extensions={"hishel_ttl": cache_ttl})
202
+ cache_metadata: CacheMetadata = CacheMetadata(response)
203
+ if not response.is_success:
204
+ log.debug("URL %s fetch returned non-success status: %s, %s", url, response.status_code, cache_metadata.stored)
205
+ elif response:
206
+ log.debug(
207
+ "URL response: status: %s, cached: %s, revalidated: %s, cache age: %s, stored: %s",
208
+ response.status_code,
209
+ cache_metadata.from_cache,
210
+ cache_metadata.revalidated,
211
+ cache_metadata.age,
212
+ cache_metadata.stored,
213
+ )
214
+ if api_stats_counter:
215
+ api_stats_counter.stats(url, response)
216
+ return response
217
+ except Exception as e:
218
+ log.debug("URL %s failed to fetch: %s", url, e)
219
+ if api_stats_counter:
220
+ api_stats_counter.stats(url, None)
221
+ return None
222
+
223
+
224
+ def validate_url(url: str, cache_ttl: int = 300) -> bool:
225
+ response: Response | None = fetch_url(url, method="HEAD", cache_ttl=cache_ttl, follow_redirects=True)
226
+ return response is not None and response.status_code != 404