PyPI - webchanges - Versions diffs - 3.24.0__tar.gz → 3.25.0__tar.gz - Mend

webchanges 3.24.0tar.gz → 3.25.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

{webchanges-3.24.0/webchanges.egg-info → webchanges-3.25.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: webchanges
-Version: 3.24.0
+Version: 3.25.0
 Summary: Check web (or command output) for changes since last run and notify. Anonymously alerts you of web changes, with
 Author-email: Mike Borsetti <mike+webchanges@borsetti.com>
 Maintainer-email: Mike Borsetti <mike+webchanges@borsetti.com>

{webchanges-3.24.0 → webchanges-3.25.0}/webchanges/__init__.py RENAMED Viewed

@@ -22,7 +22,7 @@ __project_name__ = __package__
 # * MINOR version when you add functionality in a backwards compatible manner, and
 # * MICRO or PATCH version when you make backwards compatible bug fixes. We no longer use '0'
 # If unsure on increments, use pkg_resources.parse_version to parse
-__version__ = '3.24.0'
+__version__ = '3.25.0'
 __description__ = (
     'Check web (or command output) for changes since last run and notify.\n'
     '\n'

webchanges-3.25.0/webchanges/_vendored/headers.py ADDED Viewed

@@ -0,0 +1,319 @@
+"""
+Vendored version of httpx.Headers class from httpx v0.27.0 released on 21-Feb-24
+https://github.com/encode/httpx/releases/tag/0.27.0.
+Allows us to load this class in case httpx isn't installed.
+See https://github.com/psf/requests and https://github.com/encode/httpx/blob/master/httpx/_models.py
+"""
+from __future__ import annotations
+from typing import (
+    Any,
+    AnyStr,
+    ItemsView,
+    Iterable,
+    Iterator,
+    KeysView,
+    List,
+    Mapping,
+    MutableMapping,
+    Sequence,
+    Tuple,
+    Union,
+    ValuesView,
+)
+HeaderTypes = Union[
+    'Headers',
+    Mapping[str, str],
+    Mapping[bytes, bytes],
+    Sequence[Tuple[str, str]],
+    Sequence[Tuple[bytes, bytes]],
+]
+def normalize_header_key(
+    value: str | bytes,
+    lower: bool,
+    encoding: str | None = None,
+) -> bytes:
+    """
+    Coerce str/bytes into a strictly byte-wise HTTP header key.
+    """
+    if isinstance(value, bytes):
+        bytes_value = value
+    else:
+        bytes_value = value.encode(encoding or 'ascii')
+    return bytes_value.lower() if lower else bytes_value
+def normalize_header_value(value: str | bytes, encoding: str | None = None) -> bytes:
+    """
+    Coerce str/bytes into a strictly byte-wise HTTP header value.
+    """
+    if isinstance(value, bytes):
+        return value
+    return value.encode(encoding or 'ascii')
+SENSITIVE_HEADERS = {'authorization', 'proxy-authorization'}
+def obfuscate_sensitive_headers(
+    items: Iterable[tuple[AnyStr, AnyStr]],
+) -> Iterator[tuple[AnyStr, AnyStr]]:
+    for k, v in items:
+        if to_str(k.lower()) in SENSITIVE_HEADERS:
+            v = to_bytes_or_str('[secure]', match_type_of=v)
+        yield k, v
+def to_str(value: str | bytes, encoding: str = 'utf-8') -> str:
+    return value if isinstance(value, str) else value.decode(encoding)
+def to_bytes_or_str(value: str, match_type_of: AnyStr) -> AnyStr:
+    return value if isinstance(match_type_of, str) else value.encode()
+class Headers(MutableMapping[str, str]):
+    """
+    HTTP headers, as a case-insensitive multi-dict.
+    """
+    def __init__(
+        self,
+        headers: HeaderTypes | None = None,
+        encoding: str | None = None,
+    ) -> None:
+        if headers is None:
+            self._list: List[Tuple[bytes, bytes, bytes]] = []
+        elif isinstance(headers, Headers):
+            self._list = list(headers._list)
+        elif isinstance(headers, Mapping):
+            self._list = [
+                (
+                    normalize_header_key(k, lower=False, encoding=encoding),
+                    normalize_header_key(k, lower=True, encoding=encoding),
+                    normalize_header_value(v, encoding),
+                )
+                for k, v in headers.items()
+            ]
+        else:
+            self._list = [
+                (
+                    normalize_header_key(k, lower=False, encoding=encoding),
+                    normalize_header_key(k, lower=True, encoding=encoding),
+                    normalize_header_value(v, encoding),
+                )
+                for k, v in headers
+            ]
+        self._encoding = encoding
+    @property
+    def encoding(self) -> str:
+        """
+        Header encoding is mandated as ascii, but we allow fallbacks to utf-8
+        or iso-8859-1.
+        """
+        if self._encoding is None:
+            for encoding in ['ascii', 'utf-8']:
+                for key, value in self.raw:
+                    try:
+                        key.decode(encoding)
+                        value.decode(encoding)
+                    except UnicodeDecodeError:
+                        break
+                else:
+                    # The else block runs if 'break' did not occur, meaning
+                    # all values fitted the encoding.
+                    self._encoding = encoding
+                    break
+            else:
+                # The ISO-8859-1 encoding covers all 256 code points in a byte,
+                # so will never raise decode errors.
+                self._encoding = 'iso-8859-1'
+        return self._encoding
+    @encoding.setter
+    def encoding(self, value: str) -> None:
+        self._encoding = value
+    @property
+    def raw(self) -> list[tuple[bytes, bytes]]:
+        """
+        Returns a list of the raw header items, as byte pairs.
+        """
+        return [(raw_key, value) for raw_key, _, value in self._list]
+    def keys(self) -> KeysView[str]:
+        return {key.decode(self.encoding): None for _, key, value in self._list}.keys()
+    def values(self) -> ValuesView[str]:
+        values_dict: dict[str, str] = {}
+        for _, key, value in self._list:
+            str_key = key.decode(self.encoding)
+            str_value = value.decode(self.encoding)
+            if str_key in values_dict:
+                values_dict[str_key] += f', {str_value}'
+            else:
+                values_dict[str_key] = str_value
+        return values_dict.values()
+    def items(self) -> ItemsView[str, str]:
+        """
+        Return `(key, value)` items of headers. Concatenate headers
+        into a single comma separated value when a key occurs multiple times.
+        """
+        values_dict: dict[str, str] = {}
+        for _, key, value in self._list:
+            str_key = key.decode(self.encoding)
+            str_value = value.decode(self.encoding)
+            if str_key in values_dict:
+                values_dict[str_key] += f', {str_value}'
+            else:
+                values_dict[str_key] = str_value
+        return values_dict.items()
+    def multi_items(self) -> list[tuple[str, str]]:
+        """
+        Return a list of `(key, value)` pairs of headers. Allow multiple
+        occurrences of the same key without concatenating into a single
+        comma separated value.
+        """
+        return [(key.decode(self.encoding), value.decode(self.encoding)) for _, key, value in self._list]
+    def get(self, key: str, default: Any = None) -> Any:
+        """
+        Return a header value. If multiple occurrences of the header occur
+        then concatenate them together with commas.
+        """
+        try:
+            return self[key]
+        except KeyError:
+            return default
+    def get_list(self, key: str, split_commas: bool = False) -> list[str]:
+        """
+        Return a list of all header values for a given key.
+        If `split_commas=True` is passed, then any comma separated header
+        values are split into multiple return strings.
+        """
+        get_header_key = key.lower().encode(self.encoding)
+        values = [
+            item_value.decode(self.encoding)
+            for _, item_key, item_value in self._list
+            if item_key.lower() == get_header_key
+        ]
+        if not split_commas:
+            return values
+        split_values = []
+        for value in values:
+            split_values.extend([item.strip() for item in value.split(',')])
+        return split_values
+    def update(self, headers: HeaderTypes | None = None) -> None:  # type: ignore[override]
+        headers = Headers(headers)
+        for key in headers.keys():
+            if key in self:
+                self.pop(key)
+        self._list.extend(headers._list)
+    def copy(self) -> Headers:
+        return Headers(self, encoding=self.encoding)
+    def __getitem__(self, key: str) -> str:
+        """
+        Return a single header value.
+        If there are multiple headers with the same key, then we concatenate
+        them with commas. See: https://tools.ietf.org/html/rfc7230#section-3.2.2
+        """
+        normalized_key = key.lower().encode(self.encoding)
+        items = [
+            header_value.decode(self.encoding)
+            for _, header_key, header_value in self._list
+            if header_key == normalized_key
+        ]
+        if items:
+            return ', '.join(items)
+        raise KeyError(key)
+    def __setitem__(self, key: str, value: str) -> None:
+        """
+        Set the header `key` to `value`, removing any duplicate entries.
+        Retains insertion order.
+        """
+        set_key = key.encode(self._encoding or 'utf-8')
+        set_value = value.encode(self._encoding or 'utf-8')
+        lookup_key = set_key.lower()
+        found_indexes = [idx for idx, (_, item_key, _) in enumerate(self._list) if item_key == lookup_key]
+        for idx in reversed(found_indexes[1:]):
+            del self._list[idx]
+        if found_indexes:
+            idx = found_indexes[0]
+            self._list[idx] = (set_key, lookup_key, set_value)
+        else:
+            self._list.append((set_key, lookup_key, set_value))
+    def __delitem__(self, key: str) -> None:
+        """
+        Remove the header `key`.
+        """
+        del_key = key.lower().encode(self.encoding)
+        pop_indexes = [idx for idx, (_, item_key, _) in enumerate(self._list) if item_key.lower() == del_key]
+        if not pop_indexes:
+            raise KeyError(key)
+        for idx in reversed(pop_indexes):
+            del self._list[idx]
+    def __contains__(self, key: Any) -> bool:
+        header_key = key.lower().encode(self.encoding)
+        return header_key in [key for _, key, _ in self._list]
+    def __iter__(self) -> Iterator[Any]:
+        return iter(self.keys())
+    def __len__(self) -> int:
+        return len(self._list)
+    def __eq__(self, other: Any) -> bool:
+        try:
+            other_headers = Headers(other)
+        except ValueError:
+            return False
+        self_list = [(key, value) for _, key, value in self._list]
+        other_list = [(key, value) for _, key, value in other_headers._list]
+        return sorted(self_list) == sorted(other_list)
+    def __repr__(self) -> str:
+        class_name = self.__class__.__name__
+        encoding_str = ''
+        if self.encoding != 'ascii':
+            encoding_str = f', encoding={self.encoding!r}'
+        as_list = list(obfuscate_sensitive_headers(self.multi_items()))
+        as_dict = dict(as_list)
+        no_duplicate_keys = len(as_dict) == len(as_list)
+        if no_duplicate_keys:
+            return f'{class_name}({as_dict!r}{encoding_str})'
+        return f'{class_name}({as_list!r}{encoding_str})'

{webchanges-3.24.0 → webchanges-3.25.0}/webchanges/cli.py RENAMED Viewed

@@ -120,7 +120,7 @@ def teardown_logger(verbose: Optional[int] = None) -> None:
             os.environ.pop('DEBUG', None)
-def locate_jobs_files(filename: Path, default_path: Path, ext: Optional[str] = None) -> list[Path]:
+def _expand_jobs_files(filename: Path, default_path: Path, ext: Optional[str] = None) -> list[Path]:
     """Searches for file both as specified and in the default directory, then retries with 'ext' extension if defined.
     :param filename: The filename.
@@ -134,6 +134,8 @@ def locate_jobs_files(filename: Path, default_path: Path, ext: Optional[str] = N
     # if ext is given, iterate both on raw filename and the filename with ext if different
     if ext and filename.suffix != ext:
         search_filenames.append(filename.with_suffix(ext))
+        # also iterate on file pre-pended with 'jobs-'
+        search_filenames.append(filename.with_stem(f'jobs-{filename.stem}').with_suffix(ext))
     # try as given
     for file in search_filenames:
@@ -152,6 +154,14 @@ def locate_jobs_files(filename: Path, default_path: Path, ext: Optional[str] = N
     return [filename]
+def locate_jobs_files(filenames: list[Path], default_path: Path, ext: Optional[str] = None) -> list[Path]:
+    job_files = set()
+    for filename in filenames:
+        for file in _expand_jobs_files(filename, default_path, ext):
+            job_files.add(file)
+    return list(job_files)
 def locate_storage_file(filename: Path, default_path: Path, ext: Optional[str] = None) -> Path:
     """Searches for file both as specified and in the default directory, then retries with 'ext' extension if defined.
@@ -353,7 +363,7 @@ def main() -> None:  # pragma: no cover
     # Locate config, job and hooks files
     command_config.config_file = locate_storage_file(command_config.config_file, command_config.config_path, '.yaml')
-    command_config.jobs_files = locate_jobs_files(command_config.jobs_def_file, command_config.config_path, '.yaml')
+    command_config.jobs_files = locate_jobs_files(command_config.jobs_files, command_config.config_path, '.yaml')
     command_config.hooks_file = locate_storage_file(command_config.hooks_file, command_config.config_path, '.py')
     # Check for first run

{webchanges-3.24.0 → webchanges-3.25.0}/webchanges/command.py RENAMED Viewed

@@ -11,6 +11,7 @@ import importlib.metadata
 import logging
 import os
 import platform
+import re
 import shutil
 import sqlite3
 import subprocess  # noqa: S404 Consider possible security implications associated with the subprocess module.
@@ -21,6 +22,7 @@ from concurrent.futures import ThreadPoolExecutor
 from datetime import datetime
 from pathlib import Path
 from typing import Iterable, Iterator, Optional, TYPE_CHECKING, Union
+from urllib.parse import unquote_plus
 from zoneinfo import ZoneInfo
 from webchanges import __docs_url__, __project_name__, __version__
@@ -345,22 +347,29 @@ class UrlwatchCommand:
                     pass
         return 0
-    def list_jobs(self) -> None:
+    def list_jobs(self, regex: Union[bool, str]) -> None:
         """
         Lists the job and their respective _index_number.
         :return: None.
         """
+        if isinstance(regex, str):
+            print(f"List of jobs matching the RegEx '{regex}':")
+        else:
+            print('List of jobs:')
         for job in self.urlwatcher.jobs:
             if self.urlwatch_config.verbose:
-                print(f'{job.index_number:3}: {job!r}')
+                job_desc = f'{job.index_number:3}: {job!r}'
             else:
                 pretty_name = job.pretty_name()
                 location = job.get_location()
                 if pretty_name != location:
-                    print(f'{job.index_number:3}: {pretty_name} ({location})')
+                    job_desc = f'{job.index_number:3}: {pretty_name} ({location})'
                 else:
-                    print(f'{job.index_number:3}: {pretty_name}')
+                    job_desc = f'{job.index_number:3}: {pretty_name}'
+            if isinstance(regex, bool) or re.findall(regex, job_desc):
+                print(job_desc)
         if len(self.urlwatch_config.jobs_files) > 1:
             jobs_files = ['Jobs files concatenated:'] + [f'• {file}' for file in self.urlwatch_config.jobs_files]
         elif len(self.urlwatch_config.jobs_files) == 1:
@@ -377,13 +386,17 @@ class UrlwatchCommand:
         :return: The matching JobBase.
         :raises IndexError: If job is not found.
         """
-        try:
-            index = int(query)
-        except ValueError:
+        if isinstance(query, int):
+            index = query
+        else:
             try:
-                return next((job for job in self.urlwatcher.jobs if job.get_location() == query))
-            except StopIteration as e:
-                raise ValueError(f"Job {query} does not match any job's url/user_visible_url or command.") from e
+                index = int(query)
+            except ValueError:
+                query = unquote_plus(query)
+                try:
+                    return next((job for job in self.urlwatcher.jobs if unquote_plus(job.get_location()) == query))
+                except StopIteration:
+                    raise ValueError(f"Job {query} does not match any job's url/user_visible_url or command.") from None
         if index == 0:
             raise ValueError(f'Job index {index} out of range.')
@@ -422,15 +435,15 @@ class UrlwatchCommand:
             message = [f'No syntax errors in config file {self.urlwatch_config.config_file}']
             conj = ',\n' if 'hooks' in sys.modules else '\nand '
             if len(self.urlwatch_config.jobs_files) == 1:
-                message.append(f'{conj}jobs file {self.urlwatch_config.jobs_files[0]}')
+                message.append(f'{conj}jobs file {self.urlwatch_config.jobs_files[0]},')
             else:
                 message.append(
                     '\n   '.join(
-                        [f'{conj}jobs files'] + [f'• {file}' for file in sorted(self.urlwatch_config.jobs_files)]
+                        [f'{conj}jobs files'] + [f'• {file},' for file in sorted(self.urlwatch_config.jobs_files)]
                     )
                 )
             if 'hooks' in sys.modules:
-                message.append(f",\nand hooks file {sys.modules['hooks'].__file__}")
+                message.append(f"\nand hooks file {sys.modules['hooks'].__file__}")
             print(f"{''.join(message)}.")
             return
@@ -491,10 +504,6 @@ class UrlwatchCommand:
         job = self._find_job_with_defaults(job_id)
-        # TODO: The below is a hack; must find whether data is markdown programmatically (e.g. save it in database)
-        if job.filter:
-            job.is_markdown = any('html2text' in filter_type for filter_type in job.filter)
         history_data = self.urlwatcher.ssdb_storage.get_history_snapshots(job.get_guid())
         num_snapshots = len(history_data)
@@ -531,8 +540,6 @@ class UrlwatchCommand:
                         job_state.old_etag = history_dic_snapshots[close_matches[0]].etag
                         job_state.old_mime_type = history_dic_snapshots[close_matches[0]].mime_type
-                # TODO: setting of job_state.job.is_markdown = True when it had been set by a filter.
-                # Ideally it should be saved as an attribute when saving "data".
                 if self.urlwatch_config.test_reporter is None:
                     self.urlwatch_config.test_reporter = 'stdout'  # default
                 report.job_states = []  # required
@@ -647,7 +654,7 @@ class UrlwatchCommand:
             jobs_files = [f'in jobs file {self.urlwatch_config.jobs_files[0]}:']
         else:
             jobs_files = ['in the concatenation of the jobs files'] + [
-                f'• {file}' for file in self.urlwatch_config.jobs_files
+                f'• {file},' for file in self.urlwatch_config.jobs_files
             ]
         header = '\n   '.join(['Jobs with errors or returning no data (after unmodified filters, if any)'] + jobs_files)
@@ -1038,7 +1045,7 @@ class UrlwatchCommand:
     def handle_actions(self) -> None:
         """Handles the actions for command line arguments and exits."""
         if self.urlwatch_config.list_jobs:
-            self.list_jobs()
+            self.list_jobs(self.urlwatch_config.list_jobs)
             self._exit(0)
         if self.urlwatch_config.errors:

{webchanges-3.24.0 → webchanges-3.25.0}/webchanges/config.py RENAMED Viewed

@@ -33,10 +33,10 @@ class CommandConfig(BaseConfig):
     """Command line arguments configuration; the arguments are stored as class attributes."""
     add: Optional[str]
-    change_location: tuple[Union[int, str], str]
+    change_location: Optional[tuple[Union[int, str], str]]
     check_new: bool
-    clean_database: int
-    database_engine: str
+    clean_database: Optional[int]
+    database_engine: Optional[str]
     delete: Optional[str]
     delete_snapshot: Optional[str]
     detailed_versions: bool
@@ -44,21 +44,22 @@ class CommandConfig(BaseConfig):
     edit: bool
     edit_config: bool
     edit_hooks: bool
-    errors: str
+    errors: Optional[str]
     features: bool
     footnote: Optional[str]
-    gc_database: int
+    gc_database: Optional[int]
     install_chrome: bool
     joblist: list[str]
-    list_jobs: bool
-    max_snapshots: int
+    jobs_files: list[Path]
+    list_jobs: Optional[Union[bool, str]]
+    max_snapshots: Optional[int]
     max_workers: Optional[int]
     no_headless: bool
     rollback_database: Optional[str]
     smtp_login: bool
     telegram_chats: bool
     test_differ: Optional[list[str]]
-    test_job: Union[bool, Optional[str]]
+    test_job: Optional[Union[bool, str]]
     test_reporter: Optional[str]
     verbose: Optional[int]
     xmpp_login: bool
@@ -82,8 +83,8 @@ class CommandConfig(BaseConfig):
            snapshots are stored.
         """
         super().__init__(config_path, config_file, jobs_def_file, hooks_file, ssdb_file)
-        self.jobs_files = [jobs_def_file]
         self.parse_args(args)
+        self.jobs_files = self.jobs_files or [jobs_def_file]
     class CustomHelpFormatter(argparse.RawDescriptionHelpFormatter):
         def __init__(self, prog: str) -> None:
@@ -114,10 +115,7 @@ class CommandConfig(BaseConfig):
         parser.add_argument(
             'joblist',
             nargs='*',
-            help=(
-                'JOB(S) to run (if one, index as per --list or URL/command, if multiple, by index) (default: run all '
-                'jobs)'
-            ),
+            help=('JOB(S) to run (index number(s) as per --list; if one also URL/command) (default: run all jobs)'),
             metavar='JOB(S)',
         )
         parser.add_argument(
@@ -135,11 +133,12 @@ class CommandConfig(BaseConfig):
         group.add_argument(
             '--jobs',
             '--urls',
-            default=self.jobs_def_file,
+            action='append',
+            # default=[self.jobs_def_file],
             type=Path,
             help='read job list (URLs/commands) from FILE or files matching a glob pattern',
             metavar='FILE',
-            dest='jobs_def_file',
+            dest='jobs_files',
         )
         group.add_argument(
             '--config',
@@ -170,8 +169,11 @@ class CommandConfig(BaseConfig):
         group = parser.add_argument_group('job management')
         group.add_argument(
             '--list-jobs',
-            action='store_true',
-            help='list jobs and their index number',
+            nargs='?',
+            const=True,
+            help='list jobs and their index number (optional: only those who match REGEX)',
+            metavar='REGEX',
+            dest='list_jobs',
         )
         group.add_argument(
             '--errors',

webchanges 3.24.0__tar.gz → 3.25.0__tar.gz

webchanges 3.24.0tar.gz → 3.25.0tar.gz