PyPI - webchanges - Versions diffs - 3.27.0__tar.gz → 3.28.1__tar.gz - Mend

webchanges 3.27.0tar.gz → 3.28.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

{webchanges-3.27.0/webchanges.egg-info → webchanges-3.28.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: webchanges
-Version: 3.27.0
+Version: 3.28.1
 Summary: Web Changes Delivered. AI-Summarized. Totally Anonymous.
 Author-email: Mike Borsetti <mike+webchanges@borsetti.com>
 Maintainer-email: Mike Borsetti <mike+webchanges@borsetti.com>
@@ -114,6 +114,7 @@ Requires-Dist: msgpack
 Requires-Dist: platformdirs
 Requires-Dist: pyyaml
 Requires-Dist: tzdata; sys_platform == "win32"
+Requires-Dist: zstandard
 Provides-Extra: use-browser
 Requires-Dist: playwright; extra == "use-browser"
 Requires-Dist: psutil; extra == "use-browser"
@@ -160,10 +161,8 @@ Provides-Extra: requests
 Requires-Dist: requests; extra == "requests"
 Provides-Extra: safe-password
 Requires-Dist: keyring; extra == "safe-password"
-Provides-Extra: zstd
-Requires-Dist: zstandard; extra == "zstd"
 Provides-Extra: all
-Requires-Dist: webchanges[beautify,bs4,deepdiff_xml,html5lib,ical2text,imagediff,jq,matrix,ocr,pdf2text,pushbullet,pushover,pypdf_crypto,redis,requests,safe_password,use_browser,xmpp,zstd]; extra == "all"
+Requires-Dist: webchanges[beautify,bs4,deepdiff_xml,html5lib,ical2text,imagediff,jq,matrix,ocr,pdf2text,pushbullet,pushover,pypdf_crypto,redis,requests,safe_password,use_browser,xmpp]; extra == "all"
 .. role:: underline
     :class: underline

{webchanges-3.27.0 → webchanges-3.28.1}/pyproject.toml RENAMED Viewed

@@ -90,9 +90,8 @@ xmpp = ['aioxmpp']
 redis = ['redis']
 requests = ['requests']
 safe_password = ['keyring']
-zstd = ['zstandard']
 all = [
-    'webchanges[use_browser,beautify,bs4,html5lib,ical2text,jq,ocr,pdf2text,pypdf_crypto,deepdiff_xml,imagediff,matrix,pushbullet,pushover,xmpp,redis,requests,safe_password,zstd]'
+    'webchanges[use_browser,beautify,bs4,html5lib,ical2text,jq,ocr,pdf2text,pypdf_crypto,deepdiff_xml,imagediff,matrix,pushbullet,pushover,xmpp,redis,requests,safe_password]'
 ]

{webchanges-3.27.0 → webchanges-3.28.1}/requirements.txt RENAMED Viewed

@@ -9,3 +9,4 @@ msgpack
 platformdirs
 pyyaml
 tzdata; sys_platform == "win32"
+zstandard

{webchanges-3.27.0 → webchanges-3.28.1}/webchanges/__init__.py RENAMED Viewed

@@ -22,7 +22,7 @@ __project_name__ = __package__
 # * MINOR version when you add functionality in a backwards compatible manner, and
 # * MICRO or PATCH version when you make backwards compatible bug fixes. We no longer use '0'
 # If unsure on increments, use pkg_resources.parse_version to parse
-__version__ = '3.27.0'
+__version__ = '3.28.1'
 __description__ = (
     'Check web (or command output) for changes since last run and notify.\n'
     '\n'

{webchanges-3.27.0 → webchanges-3.28.1}/webchanges/command.py RENAMED Viewed

@@ -482,16 +482,16 @@ class UrlwatchCommand:
         """
         Runs jobs that have no history to populate the snapshot database when they're newly added.
         """
-        new_jobs = []
+        new_jobs = set()
         for idx, job in enumerate(self.urlwatcher.jobs):
             has_history = bool(self.urlwatcher.ssdb_storage.get_history_snapshots(job.get_guid()))
             if not has_history:
                 print(f'Adding new {job.get_indexed_location()}')
-                new_jobs.append(idx + 1)
-        if not new_jobs:
+                new_jobs.add(idx + 1)
+        if not new_jobs and not self.urlwatch_config.joblist:
             print('Found no new jobs to run.')
             return
-        self.urlwatcher.urlwatch_config.joblist = new_jobs
+        self.urlwatcher.urlwatch_config.joblist = set(self.urlwatcher.urlwatch_config.joblist).union(new_jobs)
         self.urlwatcher.run_jobs()
         self.urlwatcher.close()
         return

{webchanges-3.27.0 → webchanges-3.28.1}/webchanges/config.py RENAMED Viewed

@@ -11,7 +11,7 @@ import textwrap
 # import os
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Sequence
+from typing import Collection
 from webchanges import __doc__ as doc
 from webchanges import __docs_url__, __project_name__, __version__
@@ -51,7 +51,7 @@ class CommandConfig(BaseConfig):
     gc_database: int | None
     hooks_files: list[Path]
     install_chrome: bool
-    joblist: Sequence[str | int]
+    joblist: Collection[str | int]
     jobs_files: list[Path]
     list_jobs: bool | str | None
     log_file: Path

{webchanges-3.27.0 → webchanges-3.28.1}/webchanges/differs.py RENAMED Viewed

@@ -71,6 +71,7 @@ except ImportError as e:  # pragma: no cover
 # https://stackoverflow.com/questions/39740632
 if TYPE_CHECKING:
     from webchanges.handler import JobState
+    from webchanges.storage import _Config
 logger = logging.getLogger(__name__)
@@ -123,7 +124,7 @@ class DifferBase(metaclass=TrackSubClasses):
         """
         result: list[str] = []
         for sc in TrackSubClasses.sorted_by_kind(cls):
-            # default_subdirective = getattr(sc, '__default_subdirective__', None)
+            # default_directive = getattr(sc, '__default_directive__', None)
             result.extend((f'  * {sc.__kind__} - {sc.__doc__}',))
             if hasattr(sc, '__supported_directives__'):
                 for key, doc in sc.__supported_directives__.items():
@@ -136,30 +137,56 @@ class DifferBase(metaclass=TrackSubClasses):
         cls,
         differ_spec: dict[str, Any] | None,
         job_index_number: int | None = None,
+        config: _Config | None = None,
     ) -> tuple[str, dict[str, Any]]:
         """Checks the differ_spec for its validity and applies default values.
         :param differ_spec: The differ as entered by the user; use "unified" if empty.
         :param job_index_number: The job index number.
-        :returns: A validated differ_kind, subdirectives (where subdirectives is a dict).
+        :returns: A validated differ_kind, directives tuple.
         """
+        def directives_with_defaults(
+            differ_spec: str, directives: dict[str, Any], config: _Config | None = None
+        ) -> dict[str, Any]:
+            """Obtain differ subdirectives that also contains defaults from the configuration.
+            :param differ_kind: The differ kind.
+            :param directives: The differ directives as stated in the job.
+            :returns: directives inclusive of configuration defaults.
+            """
+            if config is None:
+                logger.error('Cannot merge differ differdirectives with defaults as no config object was passed')
+                return directives
+            cfg = config.get('differ_defaults')
+            if isinstance(cfg, dict):
+                defaults: dict[str, Any] = cfg.get(differ_spec)  # type: ignore[assignment]
+                if defaults:
+                    for key, value in defaults.items():
+                        if key not in directives:
+                            directives[key] = value
+            return directives
         differ_spec = differ_spec or {'name': 'unified'}
-        subdirectives = differ_spec.copy()
-        differ_kind = subdirectives.pop('name', '')
+        directives = differ_spec.copy()
+        differ_kind = directives.pop('name', '')
         if not differ_kind:
-            if list(subdirectives.keys()) == ['command']:
+            if list(directives.keys()) == ['command']:
                 differ_kind = 'command'
             else:
                 raise ValueError(
                     f"Job {job_index_number}: Differ directive must have a 'name' sub-directive: {differ_spec}."
                 )
-        differcls = cls.__subclasses__.get(differ_kind, None)
+        differcls: DifferBase | None = cls.__subclasses__.get(differ_kind, None)  # type: ignore[assignment]
         if not differcls:
             raise ValueError(f'Job {job_index_number}: No differ named {differ_kind}.')
+        if directives:
+            directives = directives_with_defaults(differ_kind, directives, config)
         if hasattr(differcls, '__supported_directives__'):
-            provided_keys = set(subdirectives.keys())
+            provided_keys = set(directives.keys())
             allowed_keys = set(differcls.__supported_directives__.keys())
             unknown_keys = provided_keys.difference(allowed_keys)
             if unknown_keys and '<any>' not in allowed_keys:
@@ -168,7 +195,7 @@ class DifferBase(metaclass=TrackSubClasses):
                     f"{', '.join(unknown_keys)} (supported: {', '.join(sorted(allowed_keys))})."
                 )
-        return differ_kind, subdirectives
+        return differ_kind, directives
     @classmethod
     def process(
@@ -1246,9 +1273,7 @@ class AIGoogleDiffer(DifferBase):
     __kind__ = 'ai_google'
     __supported_directives__ = {
-        'model': (
-            'model name from https://ai.google.dev/gemini-api/docs/models/gemini (default: gemini-1.5-flash-latest)'
-        ),
+        'model': ('model name from https://ai.google.dev/gemini-api/docs/models/gemini (default: gemini-2.0-flash)'),
         'system_instructions': (
             'Optional tone and style instructions for the model (default: see documentation at'
             'https://webchanges.readthedocs.io/en/stable/differs.html#ai-google-diff)'
@@ -1264,7 +1289,7 @@ class AIGoogleDiffer(DifferBase):
         'tools': "data passed on to the API's 'tools' field (default: None)",
         'unified': 'directives passed to the unified differ (default: None)',
     }
-    __default_subdirective__ = 'model'
+    __default_directive__ = 'model'
     @staticmethod
     def _send_to_model(
@@ -1279,7 +1304,7 @@ class AIGoogleDiffer(DifferBase):
         if directives is None:
             directives = {}
         if 'model' not in directives:
-            directives['model'] = 'gemini-1.5-pro'  # also for footer
+            directives['model'] = 'gemini-2.0-flash'  # also for footer
         model = directives.get('model')
         timeout = directives.get('timeout', 300)
         max_output_tokens = directives.get('max_output_tokens')

{webchanges-3.27.0 → webchanges-3.28.1}/webchanges/handler.py RENAMED Viewed

@@ -238,7 +238,7 @@ class JobState(ContextManager):
                 filtered_data, mime_type = FilterBase.auto_process(self, data, mime_type)
                 # Apply any specified filters
-                for filter_kind, subfilter in FilterBase.normalize_filter_list(self.job.filter, self.job.index_number):
+                for filter_kind, subfilter in FilterBase.normalize_filter_list(self.job.filters, self.job.index_number):
                     filtered_data, mime_type = FilterBase.process(
                         filter_kind, subfilter, self, filtered_data, mime_type
                     )
@@ -293,6 +293,7 @@ class JobState(ContextManager):
         report_kind: Literal['text', 'markdown', 'html'] = 'text',
         differ: dict[str, Any] | None = None,
         tz: ZoneInfo | None = None,
+        config: _Config | None = None,
     ) -> str:
         """Generates the job's diff and applies diff_filters to it (if any). Memoized.
@@ -305,14 +306,20 @@ class JobState(ContextManager):
             return self.generated_diff[report_kind]
         if report_kind not in self.unfiltered_diff:
-            differ_kind, subdiffer = DifferBase.normalize_differ(differ or self.job.differ, self.job.index_number)
+            differ_kind, subdiffer = DifferBase.normalize_differ(
+                differ or self.job.differ,
+                self.job.index_number,
+                config,
+            )
             unfiltered_diff = DifferBase.process(differ_kind, subdiffer, self, report_kind, tz, self.unfiltered_diff)
             self.unfiltered_diff.update(unfiltered_diff)
         _generated_diff = self.unfiltered_diff[report_kind]
         if _generated_diff:
             # Apply any specified diff_filters
             _mime_type = 'text/plain'
-            for filter_kind, subfilter in FilterBase.normalize_filter_list(self.job.diff_filter, self.job.index_number):
+            for filter_kind, subfilter in FilterBase.normalize_filter_list(
+                self.job.diff_filters, self.job.index_number
+            ):
                 _generated_diff, _mime_type = FilterBase.process(  # type: ignore[assignment]
                     filter_kind, subfilter, self, _generated_diff, _mime_type
                 )
@@ -363,7 +370,7 @@ class Report:
         :param job_state: The JobState object with the information of the job run.
         """
-        if job_state.exception is not None and job_state.exception is not NotModifiedError:
+        if job_state.exception is not None and not isinstance(job_state.exception, NotModifiedError):
             logger.info(
                 f'Job {job_state.job.index_number}: Got exception while processing job {job_state.job}',
                 exc_info=job_state.exception,
@@ -460,7 +467,7 @@ class Report:
             if (
                 job_state.verb == 'changed'
                 and not self.config['display']['empty-diff']
-                and job_state.get_diff(tz=self.tz) == ''
+                and job_state.get_diff(tz=self.tz, config=self.config) == ''
             ):
                 return True

{webchanges-3.27.0 → webchanges-3.28.1}/webchanges/jobs.py RENAMED Viewed

@@ -130,7 +130,6 @@ class JobBase(metaclass=TrackSubClasses):
     _delay: float | None = None
     additions_only: bool | None = None
     block_elements: list[str] | None = None
-    chromium_revision: dict[str, int] | dict[str, str] | str | int | None = None  # deprecated
     compared_versions: int | None = None
     contextlines: int | None = None
     cookies: dict[str, str] | None = None
@@ -138,15 +137,13 @@ class JobBase(metaclass=TrackSubClasses):
     data_as_json: bool | None = None
     deletions_only: bool | None = None
     differ: dict[str, Any] | None = None  # added in 3.21
-    diff_filter: str | list[str | dict[str, Any]] | None = None
+    diff_filters: str | list[str | dict[str, Any]] | None = None
     diff_tool: str | None = None  # deprecated in 3.21
     enabled: bool | None = None
     encoding: str | None = None
-    filter: str | list[str | dict[str, Any]] | None = None
+    filters: str | list[str | dict[str, Any]] | None = None
     headers = Headers(encoding='utf-8')
     http_client: Literal['httpx', 'requests'] | None = None
-    http_proxy: str | None = None
-    https_proxy: str | None = None
     ignore_cached: bool | None = None
     ignore_connection_errors: bool | None = None
     ignore_default_args: bool | str | list[str] | None = None
@@ -172,6 +169,7 @@ class JobBase(metaclass=TrackSubClasses):
     no_redirects: bool | None = None
     note: str | None = None
     params: str | list | dict | None = None
+    proxy: str | None = None
     referer: str | None = None  # Playwright
     retries: int | None = None
     ssl_no_verify: bool | None = None
@@ -191,6 +189,23 @@ class JobBase(metaclass=TrackSubClasses):
     wait_until: Literal['commit', 'domcontentloaded', 'load', 'networkidle'] | None = None
     def __init__(self, **kwargs: Any) -> None:
+        # backward-compatibility
+        if 'filter' in kwargs:
+            logger.info(f"Job {kwargs.get('index_number')}: Replacing deprecated directive 'filter' with 'filters'")
+            kwargs['filters'] = kwargs.pop('filter')
+        if 'diff_filter' in kwargs:
+            logger.info(
+                f"Job {kwargs.get('index_number')}: Replacing deprecated directive 'diff_filter' with 'diff_filter'"
+            )
+            kwargs['diff_filters'] = kwargs.pop('diff_filter')
+        if 'https_proxy' in kwargs:
+            logger.info(f"Job {kwargs.get('index_number')}: Replacing deprecated directive 'https_proxy' with 'proxy'")
+            kwargs['proxy'] = kwargs.pop('https_proxy')
+            kwargs.pop('http_proxy', None)
+        elif 'http_proxy' in kwargs:
+            logger.info(f"Job {kwargs.get('index_number')}: Replacing deprecated directive 'http_proxy' with 'proxy'")
+            kwargs['proxy'] = kwargs.pop('http_proxy')
         # Fail if any required keys are not provided
         for k in self.__required__:
             if k not in kwargs:
@@ -348,7 +363,7 @@ class JobBase(metaclass=TrackSubClasses):
         return job
     def to_dict(self) -> dict:
-        """Return all definte (not None) Job object directives, required and optional, as a serializable dict,
+        """Return all defined (not None) Job object directives, required and optional, as a serializable dict,
         converting Headers object (which are not JSON serializable) to dicts.
         :returns: A dict with all job directives as keys, ignoring those that are extras.
@@ -369,7 +384,8 @@ class JobBase(metaclass=TrackSubClasses):
         :returns: A JobBase type object.
         """
         for k in data.keys():
-            if k not in cls.__required__ + cls.__optional__:
+            # backward-compatibility
+            if k not in cls.__required__ + cls.__optional__ + ('filter', 'diff_filter', 'http_client', 'http_proxy'):
                 if len(filenames) > 1:
                     jobs_files = ['in the concatenation of the jobs files:'] + [f'• {file},' for file in filenames]
                 elif len(filenames) == 1:
@@ -535,6 +551,22 @@ class JobBase(metaclass=TrackSubClasses):
         if self.monospace is None:
             self.monospace = True
+    def get_proxy(self) -> str | None:
+        """Check that URL is http or https and return proxy value."""
+        scheme = urlsplit(self.url).scheme
+        if scheme not in {'http', 'https'}:
+            raise ValueError(
+                f'Job {self.index_number}: URL should start with https:// or http:// (check for typos): {self.url}'
+            )
+        proxy = self.proxy
+        if proxy is None:
+            if os.getenv((scheme + '_proxy').upper()):
+                proxy = os.getenv((scheme + '_proxy').upper())
+            logger.debug(
+                f"Job {self.index_number}: Setting proxy from environment variable {(scheme + '_proxy').upper()}"
+            )
+        return proxy
 class Job(JobBase):
     """Job class for jobs."""
@@ -546,10 +578,10 @@ class Job(JobBase):
         'contextlines',
         'deletions_only',
         'differ',
-        'diff_filter',
+        'diff_filters',
         'diff_tool',  # deprecated in 3.21
         'enabled',
-        'filter',
+        'filters',
         'index_number',
         'is_markdown',
         'kind',  # hooks.py
@@ -672,14 +704,12 @@ class UrlJob(UrlJobBase):
         'data_as_json',
         'encoding',
         'headers',
-        'http_client',
-        'http_proxy',
-        'https_proxy',
         'ignore_cached',
         'ignore_dh_key_too_small',
         'method',
         'no_redirects',
         'params',
+        'proxy',
         'retries',
         'ssl_no_verify',
         'timeout',
@@ -716,17 +746,10 @@ class UrlJob(UrlJobBase):
                 f'Job {self.index_number}: Using the HTTPX HTTP client library (HTTP/2 support is not available since '
                 f'h2 is not installed)'
             )
-        proxy: str | None = None
-        scheme = urlsplit(self.url).scheme
-        if scheme not in {'http', 'https'}:
-            raise ValueError(
-                f'Job {self.index_number}: URL should start with https:// or http:// (check for typos): {self.url}'
-            )
-        if getattr(self, scheme + '_proxy'):
-            proxy = getattr(self, scheme + '_proxy')
-        elif os.getenv((scheme + '_proxy').upper()):
-            proxy = os.getenv((scheme + '_proxy').upper())
-        logger.debug(f'Job {self.index_number}: Proxies: {proxy}')
+        proxy = self.get_proxy()
+        if proxy is not None:
+            logger.debug(f'Job {self.index_number}: Proxy: {proxy}')
         if self.ignore_dh_key_too_small:
             logger.debug(
@@ -795,7 +818,7 @@ class UrlJob(UrlJobBase):
             etag = response.headers.get('ETag', '')
             mime_type = response.headers.get('Content-Type', '').split(';')[0]
-        if FilterBase.filter_chain_needs_bytes(self.filter):
+        if FilterBase.filter_chain_needs_bytes(self.filters):
             return response.content, etag, mime_type
         if self.encoding:
@@ -812,13 +835,13 @@ class UrlJob(UrlJobBase):
         :raises NotModifiedError: If an HTTP 304 response is received.
         """
         logger.info(f'Job {self.index_number}: Using the requests HTTP client library')
-        proxies = None
-        scheme = urlsplit(self.url).scheme
-        if getattr(self, scheme + '_proxy'):
-            proxies = {scheme: getattr(self, scheme + '_proxy')}
-        elif os.getenv((scheme + '_proxy').upper()):
-            proxies = {scheme: os.getenv((scheme + '_proxy').upper())}
-        logger.debug(f'Job {self.index_number}: Proxies: {proxies}')
+        proxy_str = self.get_proxy()
+        if proxy_str is not None:
+            scheme = urlsplit(self.url).scheme
+            proxies = {scheme: proxy_str}
+            logger.debug(f'Job {self.index_number}: Proxies: {proxies}')
+        else:
+            proxies = None
         if self.ssl_no_verify:
             # required to suppress warnings with 'ssl_no_verify: true'
@@ -901,7 +924,7 @@ class UrlJob(UrlJobBase):
             etag = response.headers.get('ETag', '')
             mime_type = response.headers.get('Content-Type', '').split(';')[0]
-        if FilterBase.filter_chain_needs_bytes(self.filter):
+        if FilterBase.filter_chain_needs_bytes(self.filters):
             return response.content, etag, mime_type
         if self.encoding:
@@ -939,7 +962,7 @@ class UrlJob(UrlJobBase):
             else:
                 filename = Path(str(urlparse(self.url).path))
-            if FilterBase.filter_chain_needs_bytes(self.filter):
+            if FilterBase.filter_chain_needs_bytes(self.filters):
                 return filename.read_bytes(), '', 'application/octet-stream'
             else:
                 return filename.read_text(), '', 'text/plain'
@@ -955,7 +978,7 @@ class UrlJob(UrlJobBase):
                 str(password),
                 timeout=self.timeout,
             ) as ftp:
-                if FilterBase.filter_chain_needs_bytes(self.filter):
+                if FilterBase.filter_chain_needs_bytes(self.filters):
                     data_bytes = b''
                     def callback_bytes(dt: bytes) -> None:
@@ -1020,14 +1043,27 @@ class UrlJob(UrlJobBase):
         logger.debug(f'Job {self.index_number}: Cookies: {self.cookies}')
         if self.http_client == 'requests' or not httpx:
+            if isinstance(requests, str):
+                message = f'Job {job_state.job.index_number} cannot be run '
+                if self.http_client == 'requests':
+                    message += "with 'http_client: requests "
+                message += (
+                    f'( {self.get_indexed_location()} ):\n{requests}\n'
+                    f"Please install module using e.g. 'pip install --upgrade webchanges[requests]'."
+                )
+                raise ImportError(message)
             job_state._http_client_used = 'requests'
             data, etag, mime_type = self._retrieve_requests(headers=headers, timeout=timeout)
         elif not self.http_client or self.http_client == 'httpx':
             if isinstance(httpx, str):
-                raise ImportError(
-                    f"Job {job_state.job.index_number}: Python HTTP client package 'httpx' cannot be imported; cannot "
-                    f'run job ( {self.get_indexed_location()} )\n{httpx}'
+                message = f'Job {job_state.job.index_number} cannot be run '
+                if self.http_client == 'httpx':
+                    message += "with 'http_client: httpx "
+                message += (
+                    f'( {self.get_indexed_location()} ):\n{httpx}\n'
+                    f"Please install module using e.g. 'pip install --upgrade httpx[http2,zstd]'."
                 )
+                raise ImportError(message)
             job_state._http_client_used = 'HTTPX'
             data, etag, mime_type = self._retrieve_httpx(headers=headers, timeout=timeout)
         else:
@@ -1070,7 +1106,7 @@ class UrlJob(UrlJobBase):
             # Instead of a full traceback, just show the error
             exception_str = str(exception).strip()
             print(f'{exception_str=} {exception.args=} {type(exception)=}')
-            if (self.https_proxy or self.http_proxy) and (
+            if self.proxy and (
                 (httpx and isinstance(exception, httpx.TransportError))
                 or any(
                     exception_str.startswith(error_string)
@@ -1096,7 +1132,7 @@ class UrlJob(UrlJobBase):
                     )
                 )
             ):
-                exception_str += f'\n\n(Job has proxy {self.https_proxy or self.http_proxy})'
+                exception_str += f'\n\n(Job has proxy {self.proxy})'
             return exception_str
         return tb
@@ -1160,13 +1196,10 @@ class BrowserJob(UrlJobBase):
     __required__: tuple[str, ...] = ('use_browser',)
     __optional__: tuple[str, ...] = (
         'block_elements',
-        'chromium_revision',  # deprecated
         'cookies',
         'data',
         'data_as_json',
         'headers',
-        'http_proxy',
-        'https_proxy',
         'ignore_default_args',  # Playwright
         'ignore_https_errors',
         'init_script',  # Playwright,
@@ -1175,6 +1208,7 @@ class BrowserJob(UrlJobBase):
         'method',
         'navigate',
         'params',
+        'proxy',
         'switches',
         'timeout',
         'user_data_dir',
@@ -1225,7 +1259,7 @@ class BrowserJob(UrlJobBase):
         try:
             from playwright._repo_version import version as playwright_version
             from playwright.sync_api import Error as PlaywrightError
-            from playwright.sync_api import ProxySettings, Route, sync_playwright
+            from playwright.sync_api import Route, sync_playwright
         except ImportError:  # pragma: no cover
             raise ImportError(
                 f"Python package 'playwright' is not installed; cannot run jobs with the 'use_browser: true' "
@@ -1272,26 +1306,21 @@ class BrowserJob(UrlJobBase):
         headers = self.get_headers(job_state, user_agent=None)
-        proxy: ProxySettings | None = None
-        if self.http_proxy or os.getenv('HTTP_PROXY') or self.https_proxy or os.getenv('HTTPS_PROXY'):
-            if urlsplit(self.url).scheme == 'http':
-                proxy_split: SplitResult | SplitResultBytes | None = urlsplit(
-                    self.http_proxy or os.getenv('HTTP_PROXY')
-                )
-            elif urlsplit(self.url).scheme == 'https':
-                proxy_split = urlsplit(self.https_proxy or os.getenv('HTTPS_PROXY'))
-            else:
-                proxy_split = None
-            if proxy_split:
-                proxy = {
-                    'server': (
-                        f'{proxy_split.scheme!s}://{proxy_split.hostname!s}:{proxy_split.port!s}'
-                        if proxy_split.port
-                        else ''
-                    ),
-                    'username': str(proxy_split.username),
-                    'password': str(proxy_split.password),
-                }
+        proxy_str = self.get_proxy()
+        if proxy_str is not None:
+            proxy_split: SplitResult | SplitResultBytes = urlsplit(proxy_str)
+            proxy = {
+                'server': (
+                    f'{proxy_split.scheme!s}://{proxy_split.hostname!s}:{proxy_split.port!s}'
+                    if proxy_split.port
+                    else ''
+                ),
+                'username': str(proxy_split.username),
+                'password': str(proxy_split.password),
+            }
+            logger.debug(f'Job {self.index_number}: Proxy: {proxy}')
+        else:
+            proxy = None
         if self.switches:
             if isinstance(self.switches, str):
@@ -1784,8 +1813,8 @@ class BrowserJob(UrlJobBase):
         """
         exception_str = f'Browser error in {str(exception).strip()}'
         print(f'{exception_str=}, {tb=}')
-        if (self.https_proxy or self.http_proxy) and 'net::ERR' in exception_str:
-            exception_str += f'\n\n(Job has proxy {self.https_proxy or self.http_proxy})'
+        if self.proxy and 'net::ERR' in exception_str:
+            exception_str += f'\n\n(Job has proxy {self.proxy})'
             return exception_str
         return exception_str
@@ -1936,7 +1965,7 @@ class ShellJob(Job):
         :raises subprocess.TimeoutExpired: Subclass of SubprocessError, raised when a timeout expires while waiting for
            a child process.
         """
-        needs_bytes = FilterBase.filter_chain_needs_bytes(self.filter)
+        needs_bytes = FilterBase.filter_chain_needs_bytes(self.filters)
         try:
             return (
                 subprocess.run(

{webchanges-3.27.0 → webchanges-3.28.1}/webchanges/reporters.py RENAMED Viewed

@@ -466,7 +466,7 @@ class TextReporter(ReporterBase):
                 location = job_state.job.get_location()
                 if pretty_name != location:
                     location = f'{pretty_name} ({location})'
-                yield ': '.join((job_state.verb.upper(), location))
+                yield ': '.join((job_state.verb.replace('_', ' ').upper(), location))
                 if hasattr(job_state.job, 'note') and job_state.job.note:
                     yield job_state.job.note
             return
@@ -549,8 +549,8 @@ class TextReporter(ReporterBase):
         location = job_state.job.get_location()
         if pretty_name != location:
             location = f'{pretty_name} ({location})'
-        pretty_summary = ': '.join((job_state.verb.upper(), pretty_name))
-        summary = ': '.join((job_state.verb.upper(), location))
+        pretty_summary = ': '.join((job_state.verb.replace('_', ' ').upper(), pretty_name))
+        summary = ': '.join((job_state.verb.replace('_', ' ').upper(), location))
         differ = job_state.job.differ or {}
         content = self._format_content(job_state, differ)
         # self._format_content may update verb to 'changed,no_report'
@@ -597,7 +597,7 @@ class MarkdownReporter(ReporterBase):
                 location = job_state.job.get_location()
                 if pretty_name != location:
                     location = f'{pretty_name} ({location})'
-                yield f"* {': '.join((job_state.verb.upper(), location))}"
+                yield f"* {': '.join((job_state.verb.replace('_', ' ').upper(), location))}"
                 if hasattr(job_state.job, 'note') and job_state.job.note:
                     yield job_state.job.note
             return
@@ -812,8 +812,8 @@ class MarkdownReporter(ReporterBase):
             else:
                 location = f'{pretty_name} ({location})'
-        pretty_summary = ': '.join((job_state.verb.upper(), pretty_name))
-        summary = ': '.join((job_state.verb.upper(), location))
+        pretty_summary = ': '.join((job_state.verb.replace('_', ' ').upper(), pretty_name))
+        summary = ': '.join((job_state.verb.replace('_', ' ').upper(), location))
         differ = job_state.job.differ or {}
         content = self._format_content(job_state, differ)  # may update verb to 'changed,no_report'
         if job_state.verb == 'changed,no_report':

{webchanges-3.27.0 → webchanges-3.28.1}/webchanges/storage.py RENAMED Viewed

@@ -287,6 +287,20 @@ _ConfigJobDefaults = TypedDict(
     },
     total=False,
 )
+_ConfigDifferDefaults = TypedDict(
+    '_ConfigDifferDefaults',
+    {
+        '_note': str,
+        'unified': dict[str, Any],
+        'ai_google': dict[str, Any],
+        'command': dict[str, Any],
+        'deepdiff': dict[str, Any],
+        'image': dict[str, Any],
+        'table': dict[str, Any],
+        'wdiff': dict[str, Any],
+    },
+    total=False,
+)
 _ConfigDatabase = TypedDict(
     '_ConfigDatabase',
     {
@@ -300,6 +314,7 @@ _Config = TypedDict(
         'display': _ConfigDisplay,
         'report': _ConfigReport,
         'job_defaults': _ConfigJobDefaults,
+        'differ_defaults': _ConfigDifferDefaults,
         'database': _ConfigDatabase,
         'footnote': str | None,
     },
@@ -447,6 +462,16 @@ DEFAULT_CONFIG: _Config = {
         'browser': {'_note': "These are used for 'url' jobs with 'use_browser: true'."},
         'command': {'_note': "These are used for 'command' jobs."},
     },
+    'differ_defaults': {
+        '_note': 'Default directives that are applied to individual differs.',
+        'unified': {},
+        'ai_google': {},
+        'command': {},
+        'deepdiff': {},
+        'image': {},
+        'table': {},
+        'wdiff': {},
+    },
     'database': {
         'engine': 'sqlite3',
         'max_snapshots': 4,
@@ -609,7 +634,7 @@ class JobsBaseFileStorage(BaseTextualFileStorage, ABC):
             if isinstance(job, ShellJob):
                 return True
-            for filter_kind, subfilter in FilterBase.normalize_filter_list(job.filter, job.index_number):
+            for filter_kind, _ in FilterBase.normalize_filter_list(job.filters, job.index_number):
                 if filter_kind == 'shellpipe':
                     return True
@@ -721,19 +746,17 @@ class YamlConfigStorage(BaseYamlFileStorage):
         :param config: The configuration.
         :raises ValueError: If the configuration has keys not in DEFAULT_CONFIG (bad keys, e.g. typos)
         """
-        for key in {'chromium_revision'}:
-            if key in config['job_defaults']['all'] or key in config['job_defaults']['browser']:
-                warnings.warn(
-                    f'Directive {key} found in the configuration file {self.filename} has been deprecated'
-                    f'with the use of Playright. Please delete it (webchanges --edit-config)',
-                    DeprecationWarning,
-                )
         config_for_extras = copy.deepcopy(config)
         if 'job_defaults' in config_for_extras:
             # Create missing 'job_defaults' keys from DEFAULT_CONFIG
             for key in DEFAULT_CONFIG['job_defaults']:
+                if 'job_defaults' not in config_for_extras:
+                    config_for_extras['job_defaults'] = {}
                 config_for_extras['job_defaults'][key] = None  # type: ignore[literal-required]
+            for key in DEFAULT_CONFIG['differ_defaults']:
+                if 'differ_defaults' not in config_for_extras:
+                    config_for_extras['differ_defaults'] = {}
+                config_for_extras['differ_defaults'][key] = None  # type: ignore[literal-required]
         if 'hooks' in sys.modules:
             # Remove extra keys in config used in hooks (they are not in DEFAULT_CONFIG)
             for _, obj in inspect.getmembers(
@@ -742,6 +765,8 @@ class YamlConfigStorage(BaseYamlFileStorage):
                 if issubclass(obj, JobBase):
                     if obj.__kind__ not in DEFAULT_CONFIG['job_defaults'].keys():
                         config_for_extras['job_defaults'].pop(obj.__kind__, None)  # type: ignore[misc]
+                    elif obj.__kind__ not in DEFAULT_CONFIG['job_defaults'].keys():
+                        config_for_extras['job_defaults'].pop(obj.__kind__, None)  # type: ignore[misc]
                 elif issubclass(obj, ReporterBase):
                     if obj.__kind__ not in DEFAULT_CONFIG['report'].keys():
                         config_for_extras['report'].pop(obj.__kind__, None)  # type: ignore[misc]
@@ -908,11 +933,11 @@ class YamlJobsStorage(BaseYamlFileStorage, JobsBaseFileStorage):
                             + job_files_for_error()
                         )
                     )
-                if not isinstance(job.filter, (NoneType, list)):
+                if not isinstance(job.filters, (NoneType, list)):
                     raise ValueError(
                         '\n   '.join(
                             [
-                                f"The 'filter' key needs to contain a list; found a {type(job.filter).__name__} ",
+                                f"The 'filter' key needs to contain a list; found a {type(job.filters).__name__} ",
                                 f'in {job.get_indexed_location()}',
                             ]
                             + job_files_for_error()

{webchanges-3.27.0 → webchanges-3.28.1}/webchanges/worker.py RENAMED Viewed

@@ -108,7 +108,7 @@ def run_jobs(urlwatcher: Urlwatch) -> None:
                     if job_state.tries > 0:
                         job_state.tries = 0
                         job_state.save()
-                    if job_state.old_error_data:
+                    if job_state.old_error_data and job_state.job.suppress_repeated_errors:
                         urlwatcher.report.unchanged_from_error(job_state)
                     else:
                         urlwatcher.report.unchanged(job_state)
@@ -140,7 +140,7 @@ def run_jobs(urlwatcher: Urlwatch) -> None:
                     if job_state.tries > 0:
                         job_state.tries = 0
                         job_state.save()
-                    if job_state.old_error_data:
+                    if job_state.old_error_data and job_state.job.suppress_repeated_errors:
                         urlwatcher.report.unchanged_from_error(job_state)
                     else:
                         urlwatcher.report.unchanged(job_state)
@@ -195,7 +195,7 @@ def run_jobs(urlwatcher: Urlwatch) -> None:
             if urlwatcher.urlwatch_config.max_workers:
                 max_workers = urlwatcher.urlwatch_config.max_workers
             else:
-                max_workers = max(int(virt_mem / 200e6), 1)
+                max_workers = max(int(virt_mem / 400e6), 1)
                 max_workers = min(max_workers, os.cpu_count() or 1)
             logger.debug(
                 f"Running jobs that require Chrome (i.e. with 'use_browser: true') in parallel with {max_workers} "

{webchanges-3.27.0 → webchanges-3.28.1/webchanges.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: webchanges
-Version: 3.27.0
+Version: 3.28.1
 Summary: Web Changes Delivered. AI-Summarized. Totally Anonymous.
 Author-email: Mike Borsetti <mike+webchanges@borsetti.com>
 Maintainer-email: Mike Borsetti <mike+webchanges@borsetti.com>
@@ -114,6 +114,7 @@ Requires-Dist: msgpack
 Requires-Dist: platformdirs
 Requires-Dist: pyyaml
 Requires-Dist: tzdata; sys_platform == "win32"
+Requires-Dist: zstandard
 Provides-Extra: use-browser
 Requires-Dist: playwright; extra == "use-browser"
 Requires-Dist: psutil; extra == "use-browser"
@@ -160,10 +161,8 @@ Provides-Extra: requests
 Requires-Dist: requests; extra == "requests"
 Provides-Extra: safe-password
 Requires-Dist: keyring; extra == "safe-password"
-Provides-Extra: zstd
-Requires-Dist: zstandard; extra == "zstd"
 Provides-Extra: all
-Requires-Dist: webchanges[beautify,bs4,deepdiff_xml,html5lib,ical2text,imagediff,jq,matrix,ocr,pdf2text,pushbullet,pushover,pypdf_crypto,redis,requests,safe_password,use_browser,xmpp,zstd]; extra == "all"
+Requires-Dist: webchanges[beautify,bs4,deepdiff_xml,html5lib,ical2text,imagediff,jq,matrix,ocr,pdf2text,pushbullet,pushover,pypdf_crypto,redis,requests,safe_password,use_browser,xmpp]; extra == "all"
 .. role:: underline
     :class: underline

{webchanges-3.27.0 → webchanges-3.28.1}/webchanges.egg-info/requires.txt RENAMED Viewed

@@ -7,13 +7,14 @@ markdown2
 msgpack
 platformdirs
 pyyaml
+zstandard
 [:sys_platform == "win32"]
 colorama
 tzdata
 [all]
-webchanges[beautify,bs4,deepdiff_xml,html5lib,ical2text,imagediff,jq,matrix,ocr,pdf2text,pushbullet,pushover,pypdf_crypto,redis,requests,safe_password,use_browser,xmpp,zstd]
+webchanges[beautify,bs4,deepdiff_xml,html5lib,ical2text,imagediff,jq,matrix,ocr,pdf2text,pushbullet,pushover,pypdf_crypto,redis,requests,safe_password,use_browser,xmpp]
 [beautify]
 beautifulsoup4
@@ -82,6 +83,3 @@ psutil
 [xmpp]
 aioxmpp
-[zstd]
-zstandard