webchanges 3.27.0__tar.gz → 3.28.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {webchanges-3.27.0/webchanges.egg-info → webchanges-3.28.1}/PKG-INFO +3 -4
  2. {webchanges-3.27.0 → webchanges-3.28.1}/pyproject.toml +1 -2
  3. {webchanges-3.27.0 → webchanges-3.28.1}/requirements.txt +1 -0
  4. {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/__init__.py +1 -1
  5. {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/command.py +4 -4
  6. {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/config.py +2 -2
  7. {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/differs.py +38 -13
  8. {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/handler.py +12 -5
  9. {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/jobs.py +95 -66
  10. {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/reporters.py +6 -6
  11. {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/storage.py +36 -11
  12. {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/worker.py +3 -3
  13. {webchanges-3.27.0 → webchanges-3.28.1/webchanges.egg-info}/PKG-INFO +3 -4
  14. {webchanges-3.27.0 → webchanges-3.28.1}/webchanges.egg-info/requires.txt +2 -4
  15. {webchanges-3.27.0 → webchanges-3.28.1}/LICENSE +0 -0
  16. {webchanges-3.27.0 → webchanges-3.28.1}/MANIFEST.in +0 -0
  17. {webchanges-3.27.0 → webchanges-3.28.1}/README.rst +0 -0
  18. {webchanges-3.27.0 → webchanges-3.28.1}/setup.cfg +0 -0
  19. {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/_vendored/__init__.py +0 -0
  20. {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/_vendored/headers.py +0 -0
  21. {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/_vendored/packaging_version.py +0 -0
  22. {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/cli.py +0 -0
  23. {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/filters.py +0 -0
  24. {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/mailer.py +0 -0
  25. {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/main.py +0 -0
  26. {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/py.typed +0 -0
  27. {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/storage_minidb.py +0 -0
  28. {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/util.py +0 -0
  29. {webchanges-3.27.0 → webchanges-3.28.1}/webchanges.egg-info/SOURCES.txt +0 -0
  30. {webchanges-3.27.0 → webchanges-3.28.1}/webchanges.egg-info/dependency_links.txt +0 -0
  31. {webchanges-3.27.0 → webchanges-3.28.1}/webchanges.egg-info/entry_points.txt +0 -0
  32. {webchanges-3.27.0 → webchanges-3.28.1}/webchanges.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: webchanges
3
- Version: 3.27.0
3
+ Version: 3.28.1
4
4
  Summary: Web Changes Delivered. AI-Summarized. Totally Anonymous.
5
5
  Author-email: Mike Borsetti <mike+webchanges@borsetti.com>
6
6
  Maintainer-email: Mike Borsetti <mike+webchanges@borsetti.com>
@@ -114,6 +114,7 @@ Requires-Dist: msgpack
114
114
  Requires-Dist: platformdirs
115
115
  Requires-Dist: pyyaml
116
116
  Requires-Dist: tzdata; sys_platform == "win32"
117
+ Requires-Dist: zstandard
117
118
  Provides-Extra: use-browser
118
119
  Requires-Dist: playwright; extra == "use-browser"
119
120
  Requires-Dist: psutil; extra == "use-browser"
@@ -160,10 +161,8 @@ Provides-Extra: requests
160
161
  Requires-Dist: requests; extra == "requests"
161
162
  Provides-Extra: safe-password
162
163
  Requires-Dist: keyring; extra == "safe-password"
163
- Provides-Extra: zstd
164
- Requires-Dist: zstandard; extra == "zstd"
165
164
  Provides-Extra: all
166
- Requires-Dist: webchanges[beautify,bs4,deepdiff_xml,html5lib,ical2text,imagediff,jq,matrix,ocr,pdf2text,pushbullet,pushover,pypdf_crypto,redis,requests,safe_password,use_browser,xmpp,zstd]; extra == "all"
165
+ Requires-Dist: webchanges[beautify,bs4,deepdiff_xml,html5lib,ical2text,imagediff,jq,matrix,ocr,pdf2text,pushbullet,pushover,pypdf_crypto,redis,requests,safe_password,use_browser,xmpp]; extra == "all"
167
166
 
168
167
  .. role:: underline
169
168
  :class: underline
@@ -90,9 +90,8 @@ xmpp = ['aioxmpp']
90
90
  redis = ['redis']
91
91
  requests = ['requests']
92
92
  safe_password = ['keyring']
93
- zstd = ['zstandard']
94
93
  all = [
95
- 'webchanges[use_browser,beautify,bs4,html5lib,ical2text,jq,ocr,pdf2text,pypdf_crypto,deepdiff_xml,imagediff,matrix,pushbullet,pushover,xmpp,redis,requests,safe_password,zstd]'
94
+ 'webchanges[use_browser,beautify,bs4,html5lib,ical2text,jq,ocr,pdf2text,pypdf_crypto,deepdiff_xml,imagediff,matrix,pushbullet,pushover,xmpp,redis,requests,safe_password]'
96
95
  ]
97
96
 
98
97
 
@@ -9,3 +9,4 @@ msgpack
9
9
  platformdirs
10
10
  pyyaml
11
11
  tzdata; sys_platform == "win32"
12
+ zstandard
@@ -22,7 +22,7 @@ __project_name__ = __package__
22
22
  # * MINOR version when you add functionality in a backwards compatible manner, and
23
23
  # * MICRO or PATCH version when you make backwards compatible bug fixes. We no longer use '0'
24
24
  # If unsure on increments, use pkg_resources.parse_version to parse
25
- __version__ = '3.27.0'
25
+ __version__ = '3.28.1'
26
26
  __description__ = (
27
27
  'Check web (or command output) for changes since last run and notify.\n'
28
28
  '\n'
@@ -482,16 +482,16 @@ class UrlwatchCommand:
482
482
  """
483
483
  Runs jobs that have no history to populate the snapshot database when they're newly added.
484
484
  """
485
- new_jobs = []
485
+ new_jobs = set()
486
486
  for idx, job in enumerate(self.urlwatcher.jobs):
487
487
  has_history = bool(self.urlwatcher.ssdb_storage.get_history_snapshots(job.get_guid()))
488
488
  if not has_history:
489
489
  print(f'Adding new {job.get_indexed_location()}')
490
- new_jobs.append(idx + 1)
491
- if not new_jobs:
490
+ new_jobs.add(idx + 1)
491
+ if not new_jobs and not self.urlwatch_config.joblist:
492
492
  print('Found no new jobs to run.')
493
493
  return
494
- self.urlwatcher.urlwatch_config.joblist = new_jobs
494
+ self.urlwatcher.urlwatch_config.joblist = set(self.urlwatcher.urlwatch_config.joblist).union(new_jobs)
495
495
  self.urlwatcher.run_jobs()
496
496
  self.urlwatcher.close()
497
497
  return
@@ -11,7 +11,7 @@ import textwrap
11
11
  # import os
12
12
  from dataclasses import dataclass, field
13
13
  from pathlib import Path
14
- from typing import Sequence
14
+ from typing import Collection
15
15
 
16
16
  from webchanges import __doc__ as doc
17
17
  from webchanges import __docs_url__, __project_name__, __version__
@@ -51,7 +51,7 @@ class CommandConfig(BaseConfig):
51
51
  gc_database: int | None
52
52
  hooks_files: list[Path]
53
53
  install_chrome: bool
54
- joblist: Sequence[str | int]
54
+ joblist: Collection[str | int]
55
55
  jobs_files: list[Path]
56
56
  list_jobs: bool | str | None
57
57
  log_file: Path
@@ -71,6 +71,7 @@ except ImportError as e: # pragma: no cover
71
71
  # https://stackoverflow.com/questions/39740632
72
72
  if TYPE_CHECKING:
73
73
  from webchanges.handler import JobState
74
+ from webchanges.storage import _Config
74
75
 
75
76
 
76
77
  logger = logging.getLogger(__name__)
@@ -123,7 +124,7 @@ class DifferBase(metaclass=TrackSubClasses):
123
124
  """
124
125
  result: list[str] = []
125
126
  for sc in TrackSubClasses.sorted_by_kind(cls):
126
- # default_subdirective = getattr(sc, '__default_subdirective__', None)
127
+ # default_directive = getattr(sc, '__default_directive__', None)
127
128
  result.extend((f' * {sc.__kind__} - {sc.__doc__}',))
128
129
  if hasattr(sc, '__supported_directives__'):
129
130
  for key, doc in sc.__supported_directives__.items():
@@ -136,30 +137,56 @@ class DifferBase(metaclass=TrackSubClasses):
136
137
  cls,
137
138
  differ_spec: dict[str, Any] | None,
138
139
  job_index_number: int | None = None,
140
+ config: _Config | None = None,
139
141
  ) -> tuple[str, dict[str, Any]]:
140
142
  """Checks the differ_spec for its validity and applies default values.
141
143
 
142
144
  :param differ_spec: The differ as entered by the user; use "unified" if empty.
143
145
  :param job_index_number: The job index number.
144
- :returns: A validated differ_kind, subdirectives (where subdirectives is a dict).
146
+ :returns: A validated differ_kind, directives tuple.
145
147
  """
148
+
149
+ def directives_with_defaults(
150
+ differ_spec: str, directives: dict[str, Any], config: _Config | None = None
151
+ ) -> dict[str, Any]:
152
+ """Obtain differ subdirectives that also contains defaults from the configuration.
153
+
154
+ :param differ_kind: The differ kind.
155
+ :param directives: The differ directives as stated in the job.
156
+ :returns: directives inclusive of configuration defaults.
157
+ """
158
+ if config is None:
159
+ logger.error('Cannot merge differ differdirectives with defaults as no config object was passed')
160
+ return directives
161
+ cfg = config.get('differ_defaults')
162
+ if isinstance(cfg, dict):
163
+ defaults: dict[str, Any] = cfg.get(differ_spec) # type: ignore[assignment]
164
+ if defaults:
165
+ for key, value in defaults.items():
166
+ if key not in directives:
167
+ directives[key] = value
168
+ return directives
169
+
146
170
  differ_spec = differ_spec or {'name': 'unified'}
147
- subdirectives = differ_spec.copy()
148
- differ_kind = subdirectives.pop('name', '')
171
+ directives = differ_spec.copy()
172
+ differ_kind = directives.pop('name', '')
149
173
  if not differ_kind:
150
- if list(subdirectives.keys()) == ['command']:
174
+ if list(directives.keys()) == ['command']:
151
175
  differ_kind = 'command'
152
176
  else:
153
177
  raise ValueError(
154
178
  f"Job {job_index_number}: Differ directive must have a 'name' sub-directive: {differ_spec}."
155
179
  )
156
180
 
157
- differcls = cls.__subclasses__.get(differ_kind, None)
181
+ differcls: DifferBase | None = cls.__subclasses__.get(differ_kind, None) # type: ignore[assignment]
158
182
  if not differcls:
159
183
  raise ValueError(f'Job {job_index_number}: No differ named {differ_kind}.')
160
184
 
185
+ if directives:
186
+ directives = directives_with_defaults(differ_kind, directives, config)
187
+
161
188
  if hasattr(differcls, '__supported_directives__'):
162
- provided_keys = set(subdirectives.keys())
189
+ provided_keys = set(directives.keys())
163
190
  allowed_keys = set(differcls.__supported_directives__.keys())
164
191
  unknown_keys = provided_keys.difference(allowed_keys)
165
192
  if unknown_keys and '<any>' not in allowed_keys:
@@ -168,7 +195,7 @@ class DifferBase(metaclass=TrackSubClasses):
168
195
  f"{', '.join(unknown_keys)} (supported: {', '.join(sorted(allowed_keys))})."
169
196
  )
170
197
 
171
- return differ_kind, subdirectives
198
+ return differ_kind, directives
172
199
 
173
200
  @classmethod
174
201
  def process(
@@ -1246,9 +1273,7 @@ class AIGoogleDiffer(DifferBase):
1246
1273
  __kind__ = 'ai_google'
1247
1274
 
1248
1275
  __supported_directives__ = {
1249
- 'model': (
1250
- 'model name from https://ai.google.dev/gemini-api/docs/models/gemini (default: gemini-1.5-flash-latest)'
1251
- ),
1276
+ 'model': ('model name from https://ai.google.dev/gemini-api/docs/models/gemini (default: gemini-2.0-flash)'),
1252
1277
  'system_instructions': (
1253
1278
  'Optional tone and style instructions for the model (default: see documentation at'
1254
1279
  'https://webchanges.readthedocs.io/en/stable/differs.html#ai-google-diff)'
@@ -1264,7 +1289,7 @@ class AIGoogleDiffer(DifferBase):
1264
1289
  'tools': "data passed on to the API's 'tools' field (default: None)",
1265
1290
  'unified': 'directives passed to the unified differ (default: None)',
1266
1291
  }
1267
- __default_subdirective__ = 'model'
1292
+ __default_directive__ = 'model'
1268
1293
 
1269
1294
  @staticmethod
1270
1295
  def _send_to_model(
@@ -1279,7 +1304,7 @@ class AIGoogleDiffer(DifferBase):
1279
1304
  if directives is None:
1280
1305
  directives = {}
1281
1306
  if 'model' not in directives:
1282
- directives['model'] = 'gemini-1.5-pro' # also for footer
1307
+ directives['model'] = 'gemini-2.0-flash' # also for footer
1283
1308
  model = directives.get('model')
1284
1309
  timeout = directives.get('timeout', 300)
1285
1310
  max_output_tokens = directives.get('max_output_tokens')
@@ -238,7 +238,7 @@ class JobState(ContextManager):
238
238
  filtered_data, mime_type = FilterBase.auto_process(self, data, mime_type)
239
239
 
240
240
  # Apply any specified filters
241
- for filter_kind, subfilter in FilterBase.normalize_filter_list(self.job.filter, self.job.index_number):
241
+ for filter_kind, subfilter in FilterBase.normalize_filter_list(self.job.filters, self.job.index_number):
242
242
  filtered_data, mime_type = FilterBase.process(
243
243
  filter_kind, subfilter, self, filtered_data, mime_type
244
244
  )
@@ -293,6 +293,7 @@ class JobState(ContextManager):
293
293
  report_kind: Literal['text', 'markdown', 'html'] = 'text',
294
294
  differ: dict[str, Any] | None = None,
295
295
  tz: ZoneInfo | None = None,
296
+ config: _Config | None = None,
296
297
  ) -> str:
297
298
  """Generates the job's diff and applies diff_filters to it (if any). Memoized.
298
299
 
@@ -305,14 +306,20 @@ class JobState(ContextManager):
305
306
  return self.generated_diff[report_kind]
306
307
 
307
308
  if report_kind not in self.unfiltered_diff:
308
- differ_kind, subdiffer = DifferBase.normalize_differ(differ or self.job.differ, self.job.index_number)
309
+ differ_kind, subdiffer = DifferBase.normalize_differ(
310
+ differ or self.job.differ,
311
+ self.job.index_number,
312
+ config,
313
+ )
309
314
  unfiltered_diff = DifferBase.process(differ_kind, subdiffer, self, report_kind, tz, self.unfiltered_diff)
310
315
  self.unfiltered_diff.update(unfiltered_diff)
311
316
  _generated_diff = self.unfiltered_diff[report_kind]
312
317
  if _generated_diff:
313
318
  # Apply any specified diff_filters
314
319
  _mime_type = 'text/plain'
315
- for filter_kind, subfilter in FilterBase.normalize_filter_list(self.job.diff_filter, self.job.index_number):
320
+ for filter_kind, subfilter in FilterBase.normalize_filter_list(
321
+ self.job.diff_filters, self.job.index_number
322
+ ):
316
323
  _generated_diff, _mime_type = FilterBase.process( # type: ignore[assignment]
317
324
  filter_kind, subfilter, self, _generated_diff, _mime_type
318
325
  )
@@ -363,7 +370,7 @@ class Report:
363
370
 
364
371
  :param job_state: The JobState object with the information of the job run.
365
372
  """
366
- if job_state.exception is not None and job_state.exception is not NotModifiedError:
373
+ if job_state.exception is not None and not isinstance(job_state.exception, NotModifiedError):
367
374
  logger.info(
368
375
  f'Job {job_state.job.index_number}: Got exception while processing job {job_state.job}',
369
376
  exc_info=job_state.exception,
@@ -460,7 +467,7 @@ class Report:
460
467
  if (
461
468
  job_state.verb == 'changed'
462
469
  and not self.config['display']['empty-diff']
463
- and job_state.get_diff(tz=self.tz) == ''
470
+ and job_state.get_diff(tz=self.tz, config=self.config) == ''
464
471
  ):
465
472
  return True
466
473
 
@@ -130,7 +130,6 @@ class JobBase(metaclass=TrackSubClasses):
130
130
  _delay: float | None = None
131
131
  additions_only: bool | None = None
132
132
  block_elements: list[str] | None = None
133
- chromium_revision: dict[str, int] | dict[str, str] | str | int | None = None # deprecated
134
133
  compared_versions: int | None = None
135
134
  contextlines: int | None = None
136
135
  cookies: dict[str, str] | None = None
@@ -138,15 +137,13 @@ class JobBase(metaclass=TrackSubClasses):
138
137
  data_as_json: bool | None = None
139
138
  deletions_only: bool | None = None
140
139
  differ: dict[str, Any] | None = None # added in 3.21
141
- diff_filter: str | list[str | dict[str, Any]] | None = None
140
+ diff_filters: str | list[str | dict[str, Any]] | None = None
142
141
  diff_tool: str | None = None # deprecated in 3.21
143
142
  enabled: bool | None = None
144
143
  encoding: str | None = None
145
- filter: str | list[str | dict[str, Any]] | None = None
144
+ filters: str | list[str | dict[str, Any]] | None = None
146
145
  headers = Headers(encoding='utf-8')
147
146
  http_client: Literal['httpx', 'requests'] | None = None
148
- http_proxy: str | None = None
149
- https_proxy: str | None = None
150
147
  ignore_cached: bool | None = None
151
148
  ignore_connection_errors: bool | None = None
152
149
  ignore_default_args: bool | str | list[str] | None = None
@@ -172,6 +169,7 @@ class JobBase(metaclass=TrackSubClasses):
172
169
  no_redirects: bool | None = None
173
170
  note: str | None = None
174
171
  params: str | list | dict | None = None
172
+ proxy: str | None = None
175
173
  referer: str | None = None # Playwright
176
174
  retries: int | None = None
177
175
  ssl_no_verify: bool | None = None
@@ -191,6 +189,23 @@ class JobBase(metaclass=TrackSubClasses):
191
189
  wait_until: Literal['commit', 'domcontentloaded', 'load', 'networkidle'] | None = None
192
190
 
193
191
  def __init__(self, **kwargs: Any) -> None:
192
+ # backward-compatibility
193
+ if 'filter' in kwargs:
194
+ logger.info(f"Job {kwargs.get('index_number')}: Replacing deprecated directive 'filter' with 'filters'")
195
+ kwargs['filters'] = kwargs.pop('filter')
196
+ if 'diff_filter' in kwargs:
197
+ logger.info(
198
+ f"Job {kwargs.get('index_number')}: Replacing deprecated directive 'diff_filter' with 'diff_filter'"
199
+ )
200
+ kwargs['diff_filters'] = kwargs.pop('diff_filter')
201
+ if 'https_proxy' in kwargs:
202
+ logger.info(f"Job {kwargs.get('index_number')}: Replacing deprecated directive 'https_proxy' with 'proxy'")
203
+ kwargs['proxy'] = kwargs.pop('https_proxy')
204
+ kwargs.pop('http_proxy', None)
205
+ elif 'http_proxy' in kwargs:
206
+ logger.info(f"Job {kwargs.get('index_number')}: Replacing deprecated directive 'http_proxy' with 'proxy'")
207
+ kwargs['proxy'] = kwargs.pop('http_proxy')
208
+
194
209
  # Fail if any required keys are not provided
195
210
  for k in self.__required__:
196
211
  if k not in kwargs:
@@ -348,7 +363,7 @@ class JobBase(metaclass=TrackSubClasses):
348
363
  return job
349
364
 
350
365
  def to_dict(self) -> dict:
351
- """Return all definte (not None) Job object directives, required and optional, as a serializable dict,
366
+ """Return all defined (not None) Job object directives, required and optional, as a serializable dict,
352
367
  converting Headers object (which are not JSON serializable) to dicts.
353
368
 
354
369
  :returns: A dict with all job directives as keys, ignoring those that are extras.
@@ -369,7 +384,8 @@ class JobBase(metaclass=TrackSubClasses):
369
384
  :returns: A JobBase type object.
370
385
  """
371
386
  for k in data.keys():
372
- if k not in cls.__required__ + cls.__optional__:
387
+ # backward-compatibility
388
+ if k not in cls.__required__ + cls.__optional__ + ('filter', 'diff_filter', 'http_client', 'http_proxy'):
373
389
  if len(filenames) > 1:
374
390
  jobs_files = ['in the concatenation of the jobs files:'] + [f'• {file},' for file in filenames]
375
391
  elif len(filenames) == 1:
@@ -535,6 +551,22 @@ class JobBase(metaclass=TrackSubClasses):
535
551
  if self.monospace is None:
536
552
  self.monospace = True
537
553
 
554
+ def get_proxy(self) -> str | None:
555
+ """Check that URL is http or https and return proxy value."""
556
+ scheme = urlsplit(self.url).scheme
557
+ if scheme not in {'http', 'https'}:
558
+ raise ValueError(
559
+ f'Job {self.index_number}: URL should start with https:// or http:// (check for typos): {self.url}'
560
+ )
561
+ proxy = self.proxy
562
+ if proxy is None:
563
+ if os.getenv((scheme + '_proxy').upper()):
564
+ proxy = os.getenv((scheme + '_proxy').upper())
565
+ logger.debug(
566
+ f"Job {self.index_number}: Setting proxy from environment variable {(scheme + '_proxy').upper()}"
567
+ )
568
+ return proxy
569
+
538
570
 
539
571
  class Job(JobBase):
540
572
  """Job class for jobs."""
@@ -546,10 +578,10 @@ class Job(JobBase):
546
578
  'contextlines',
547
579
  'deletions_only',
548
580
  'differ',
549
- 'diff_filter',
581
+ 'diff_filters',
550
582
  'diff_tool', # deprecated in 3.21
551
583
  'enabled',
552
- 'filter',
584
+ 'filters',
553
585
  'index_number',
554
586
  'is_markdown',
555
587
  'kind', # hooks.py
@@ -672,14 +704,12 @@ class UrlJob(UrlJobBase):
672
704
  'data_as_json',
673
705
  'encoding',
674
706
  'headers',
675
- 'http_client',
676
- 'http_proxy',
677
- 'https_proxy',
678
707
  'ignore_cached',
679
708
  'ignore_dh_key_too_small',
680
709
  'method',
681
710
  'no_redirects',
682
711
  'params',
712
+ 'proxy',
683
713
  'retries',
684
714
  'ssl_no_verify',
685
715
  'timeout',
@@ -716,17 +746,10 @@ class UrlJob(UrlJobBase):
716
746
  f'Job {self.index_number}: Using the HTTPX HTTP client library (HTTP/2 support is not available since '
717
747
  f'h2 is not installed)'
718
748
  )
719
- proxy: str | None = None
720
- scheme = urlsplit(self.url).scheme
721
- if scheme not in {'http', 'https'}:
722
- raise ValueError(
723
- f'Job {self.index_number}: URL should start with https:// or http:// (check for typos): {self.url}'
724
- )
725
- if getattr(self, scheme + '_proxy'):
726
- proxy = getattr(self, scheme + '_proxy')
727
- elif os.getenv((scheme + '_proxy').upper()):
728
- proxy = os.getenv((scheme + '_proxy').upper())
729
- logger.debug(f'Job {self.index_number}: Proxies: {proxy}')
749
+
750
+ proxy = self.get_proxy()
751
+ if proxy is not None:
752
+ logger.debug(f'Job {self.index_number}: Proxy: {proxy}')
730
753
 
731
754
  if self.ignore_dh_key_too_small:
732
755
  logger.debug(
@@ -795,7 +818,7 @@ class UrlJob(UrlJobBase):
795
818
  etag = response.headers.get('ETag', '')
796
819
  mime_type = response.headers.get('Content-Type', '').split(';')[0]
797
820
 
798
- if FilterBase.filter_chain_needs_bytes(self.filter):
821
+ if FilterBase.filter_chain_needs_bytes(self.filters):
799
822
  return response.content, etag, mime_type
800
823
 
801
824
  if self.encoding:
@@ -812,13 +835,13 @@ class UrlJob(UrlJobBase):
812
835
  :raises NotModifiedError: If an HTTP 304 response is received.
813
836
  """
814
837
  logger.info(f'Job {self.index_number}: Using the requests HTTP client library')
815
- proxies = None
816
- scheme = urlsplit(self.url).scheme
817
- if getattr(self, scheme + '_proxy'):
818
- proxies = {scheme: getattr(self, scheme + '_proxy')}
819
- elif os.getenv((scheme + '_proxy').upper()):
820
- proxies = {scheme: os.getenv((scheme + '_proxy').upper())}
821
- logger.debug(f'Job {self.index_number}: Proxies: {proxies}')
838
+ proxy_str = self.get_proxy()
839
+ if proxy_str is not None:
840
+ scheme = urlsplit(self.url).scheme
841
+ proxies = {scheme: proxy_str}
842
+ logger.debug(f'Job {self.index_number}: Proxies: {proxies}')
843
+ else:
844
+ proxies = None
822
845
 
823
846
  if self.ssl_no_verify:
824
847
  # required to suppress warnings with 'ssl_no_verify: true'
@@ -901,7 +924,7 @@ class UrlJob(UrlJobBase):
901
924
  etag = response.headers.get('ETag', '')
902
925
  mime_type = response.headers.get('Content-Type', '').split(';')[0]
903
926
 
904
- if FilterBase.filter_chain_needs_bytes(self.filter):
927
+ if FilterBase.filter_chain_needs_bytes(self.filters):
905
928
  return response.content, etag, mime_type
906
929
 
907
930
  if self.encoding:
@@ -939,7 +962,7 @@ class UrlJob(UrlJobBase):
939
962
  else:
940
963
  filename = Path(str(urlparse(self.url).path))
941
964
 
942
- if FilterBase.filter_chain_needs_bytes(self.filter):
965
+ if FilterBase.filter_chain_needs_bytes(self.filters):
943
966
  return filename.read_bytes(), '', 'application/octet-stream'
944
967
  else:
945
968
  return filename.read_text(), '', 'text/plain'
@@ -955,7 +978,7 @@ class UrlJob(UrlJobBase):
955
978
  str(password),
956
979
  timeout=self.timeout,
957
980
  ) as ftp:
958
- if FilterBase.filter_chain_needs_bytes(self.filter):
981
+ if FilterBase.filter_chain_needs_bytes(self.filters):
959
982
  data_bytes = b''
960
983
 
961
984
  def callback_bytes(dt: bytes) -> None:
@@ -1020,14 +1043,27 @@ class UrlJob(UrlJobBase):
1020
1043
  logger.debug(f'Job {self.index_number}: Cookies: {self.cookies}')
1021
1044
 
1022
1045
  if self.http_client == 'requests' or not httpx:
1046
+ if isinstance(requests, str):
1047
+ message = f'Job {job_state.job.index_number} cannot be run '
1048
+ if self.http_client == 'requests':
1049
+ message += "with 'http_client: requests "
1050
+ message += (
1051
+ f'( {self.get_indexed_location()} ):\n{requests}\n'
1052
+ f"Please install module using e.g. 'pip install --upgrade webchanges[requests]'."
1053
+ )
1054
+ raise ImportError(message)
1023
1055
  job_state._http_client_used = 'requests'
1024
1056
  data, etag, mime_type = self._retrieve_requests(headers=headers, timeout=timeout)
1025
1057
  elif not self.http_client or self.http_client == 'httpx':
1026
1058
  if isinstance(httpx, str):
1027
- raise ImportError(
1028
- f"Job {job_state.job.index_number}: Python HTTP client package 'httpx' cannot be imported; cannot "
1029
- f'run job ( {self.get_indexed_location()} )\n{httpx}'
1059
+ message = f'Job {job_state.job.index_number} cannot be run '
1060
+ if self.http_client == 'httpx':
1061
+ message += "with 'http_client: httpx "
1062
+ message += (
1063
+ f'( {self.get_indexed_location()} ):\n{httpx}\n'
1064
+ f"Please install module using e.g. 'pip install --upgrade httpx[http2,zstd]'."
1030
1065
  )
1066
+ raise ImportError(message)
1031
1067
  job_state._http_client_used = 'HTTPX'
1032
1068
  data, etag, mime_type = self._retrieve_httpx(headers=headers, timeout=timeout)
1033
1069
  else:
@@ -1070,7 +1106,7 @@ class UrlJob(UrlJobBase):
1070
1106
  # Instead of a full traceback, just show the error
1071
1107
  exception_str = str(exception).strip()
1072
1108
  print(f'{exception_str=} {exception.args=} {type(exception)=}')
1073
- if (self.https_proxy or self.http_proxy) and (
1109
+ if self.proxy and (
1074
1110
  (httpx and isinstance(exception, httpx.TransportError))
1075
1111
  or any(
1076
1112
  exception_str.startswith(error_string)
@@ -1096,7 +1132,7 @@ class UrlJob(UrlJobBase):
1096
1132
  )
1097
1133
  )
1098
1134
  ):
1099
- exception_str += f'\n\n(Job has proxy {self.https_proxy or self.http_proxy})'
1135
+ exception_str += f'\n\n(Job has proxy {self.proxy})'
1100
1136
  return exception_str
1101
1137
  return tb
1102
1138
 
@@ -1160,13 +1196,10 @@ class BrowserJob(UrlJobBase):
1160
1196
  __required__: tuple[str, ...] = ('use_browser',)
1161
1197
  __optional__: tuple[str, ...] = (
1162
1198
  'block_elements',
1163
- 'chromium_revision', # deprecated
1164
1199
  'cookies',
1165
1200
  'data',
1166
1201
  'data_as_json',
1167
1202
  'headers',
1168
- 'http_proxy',
1169
- 'https_proxy',
1170
1203
  'ignore_default_args', # Playwright
1171
1204
  'ignore_https_errors',
1172
1205
  'init_script', # Playwright,
@@ -1175,6 +1208,7 @@ class BrowserJob(UrlJobBase):
1175
1208
  'method',
1176
1209
  'navigate',
1177
1210
  'params',
1211
+ 'proxy',
1178
1212
  'switches',
1179
1213
  'timeout',
1180
1214
  'user_data_dir',
@@ -1225,7 +1259,7 @@ class BrowserJob(UrlJobBase):
1225
1259
  try:
1226
1260
  from playwright._repo_version import version as playwright_version
1227
1261
  from playwright.sync_api import Error as PlaywrightError
1228
- from playwright.sync_api import ProxySettings, Route, sync_playwright
1262
+ from playwright.sync_api import Route, sync_playwright
1229
1263
  except ImportError: # pragma: no cover
1230
1264
  raise ImportError(
1231
1265
  f"Python package 'playwright' is not installed; cannot run jobs with the 'use_browser: true' "
@@ -1272,26 +1306,21 @@ class BrowserJob(UrlJobBase):
1272
1306
 
1273
1307
  headers = self.get_headers(job_state, user_agent=None)
1274
1308
 
1275
- proxy: ProxySettings | None = None
1276
- if self.http_proxy or os.getenv('HTTP_PROXY') or self.https_proxy or os.getenv('HTTPS_PROXY'):
1277
- if urlsplit(self.url).scheme == 'http':
1278
- proxy_split: SplitResult | SplitResultBytes | None = urlsplit(
1279
- self.http_proxy or os.getenv('HTTP_PROXY')
1280
- )
1281
- elif urlsplit(self.url).scheme == 'https':
1282
- proxy_split = urlsplit(self.https_proxy or os.getenv('HTTPS_PROXY'))
1283
- else:
1284
- proxy_split = None
1285
- if proxy_split:
1286
- proxy = {
1287
- 'server': (
1288
- f'{proxy_split.scheme!s}://{proxy_split.hostname!s}:{proxy_split.port!s}'
1289
- if proxy_split.port
1290
- else ''
1291
- ),
1292
- 'username': str(proxy_split.username),
1293
- 'password': str(proxy_split.password),
1294
- }
1309
+ proxy_str = self.get_proxy()
1310
+ if proxy_str is not None:
1311
+ proxy_split: SplitResult | SplitResultBytes = urlsplit(proxy_str)
1312
+ proxy = {
1313
+ 'server': (
1314
+ f'{proxy_split.scheme!s}://{proxy_split.hostname!s}:{proxy_split.port!s}'
1315
+ if proxy_split.port
1316
+ else ''
1317
+ ),
1318
+ 'username': str(proxy_split.username),
1319
+ 'password': str(proxy_split.password),
1320
+ }
1321
+ logger.debug(f'Job {self.index_number}: Proxy: {proxy}')
1322
+ else:
1323
+ proxy = None
1295
1324
 
1296
1325
  if self.switches:
1297
1326
  if isinstance(self.switches, str):
@@ -1784,8 +1813,8 @@ class BrowserJob(UrlJobBase):
1784
1813
  """
1785
1814
  exception_str = f'Browser error in {str(exception).strip()}'
1786
1815
  print(f'{exception_str=}, {tb=}')
1787
- if (self.https_proxy or self.http_proxy) and 'net::ERR' in exception_str:
1788
- exception_str += f'\n\n(Job has proxy {self.https_proxy or self.http_proxy})'
1816
+ if self.proxy and 'net::ERR' in exception_str:
1817
+ exception_str += f'\n\n(Job has proxy {self.proxy})'
1789
1818
  return exception_str
1790
1819
  return exception_str
1791
1820
 
@@ -1936,7 +1965,7 @@ class ShellJob(Job):
1936
1965
  :raises subprocess.TimeoutExpired: Subclass of SubprocessError, raised when a timeout expires while waiting for
1937
1966
  a child process.
1938
1967
  """
1939
- needs_bytes = FilterBase.filter_chain_needs_bytes(self.filter)
1968
+ needs_bytes = FilterBase.filter_chain_needs_bytes(self.filters)
1940
1969
  try:
1941
1970
  return (
1942
1971
  subprocess.run(
@@ -466,7 +466,7 @@ class TextReporter(ReporterBase):
466
466
  location = job_state.job.get_location()
467
467
  if pretty_name != location:
468
468
  location = f'{pretty_name} ({location})'
469
- yield ': '.join((job_state.verb.upper(), location))
469
+ yield ': '.join((job_state.verb.replace('_', ' ').upper(), location))
470
470
  if hasattr(job_state.job, 'note') and job_state.job.note:
471
471
  yield job_state.job.note
472
472
  return
@@ -549,8 +549,8 @@ class TextReporter(ReporterBase):
549
549
  location = job_state.job.get_location()
550
550
  if pretty_name != location:
551
551
  location = f'{pretty_name} ({location})'
552
- pretty_summary = ': '.join((job_state.verb.upper(), pretty_name))
553
- summary = ': '.join((job_state.verb.upper(), location))
552
+ pretty_summary = ': '.join((job_state.verb.replace('_', ' ').upper(), pretty_name))
553
+ summary = ': '.join((job_state.verb.replace('_', ' ').upper(), location))
554
554
  differ = job_state.job.differ or {}
555
555
  content = self._format_content(job_state, differ)
556
556
  # self._format_content may update verb to 'changed,no_report'
@@ -597,7 +597,7 @@ class MarkdownReporter(ReporterBase):
597
597
  location = job_state.job.get_location()
598
598
  if pretty_name != location:
599
599
  location = f'{pretty_name} ({location})'
600
- yield f"* {': '.join((job_state.verb.upper(), location))}"
600
+ yield f"* {': '.join((job_state.verb.replace('_', ' ').upper(), location))}"
601
601
  if hasattr(job_state.job, 'note') and job_state.job.note:
602
602
  yield job_state.job.note
603
603
  return
@@ -812,8 +812,8 @@ class MarkdownReporter(ReporterBase):
812
812
  else:
813
813
  location = f'{pretty_name} ({location})'
814
814
 
815
- pretty_summary = ': '.join((job_state.verb.upper(), pretty_name))
816
- summary = ': '.join((job_state.verb.upper(), location))
815
+ pretty_summary = ': '.join((job_state.verb.replace('_', ' ').upper(), pretty_name))
816
+ summary = ': '.join((job_state.verb.replace('_', ' ').upper(), location))
817
817
  differ = job_state.job.differ or {}
818
818
  content = self._format_content(job_state, differ) # may update verb to 'changed,no_report'
819
819
  if job_state.verb == 'changed,no_report':
@@ -287,6 +287,20 @@ _ConfigJobDefaults = TypedDict(
287
287
  },
288
288
  total=False,
289
289
  )
290
+ _ConfigDifferDefaults = TypedDict(
291
+ '_ConfigDifferDefaults',
292
+ {
293
+ '_note': str,
294
+ 'unified': dict[str, Any],
295
+ 'ai_google': dict[str, Any],
296
+ 'command': dict[str, Any],
297
+ 'deepdiff': dict[str, Any],
298
+ 'image': dict[str, Any],
299
+ 'table': dict[str, Any],
300
+ 'wdiff': dict[str, Any],
301
+ },
302
+ total=False,
303
+ )
290
304
  _ConfigDatabase = TypedDict(
291
305
  '_ConfigDatabase',
292
306
  {
@@ -300,6 +314,7 @@ _Config = TypedDict(
300
314
  'display': _ConfigDisplay,
301
315
  'report': _ConfigReport,
302
316
  'job_defaults': _ConfigJobDefaults,
317
+ 'differ_defaults': _ConfigDifferDefaults,
303
318
  'database': _ConfigDatabase,
304
319
  'footnote': str | None,
305
320
  },
@@ -447,6 +462,16 @@ DEFAULT_CONFIG: _Config = {
447
462
  'browser': {'_note': "These are used for 'url' jobs with 'use_browser: true'."},
448
463
  'command': {'_note': "These are used for 'command' jobs."},
449
464
  },
465
+ 'differ_defaults': {
466
+ '_note': 'Default directives that are applied to individual differs.',
467
+ 'unified': {},
468
+ 'ai_google': {},
469
+ 'command': {},
470
+ 'deepdiff': {},
471
+ 'image': {},
472
+ 'table': {},
473
+ 'wdiff': {},
474
+ },
450
475
  'database': {
451
476
  'engine': 'sqlite3',
452
477
  'max_snapshots': 4,
@@ -609,7 +634,7 @@ class JobsBaseFileStorage(BaseTextualFileStorage, ABC):
609
634
  if isinstance(job, ShellJob):
610
635
  return True
611
636
 
612
- for filter_kind, subfilter in FilterBase.normalize_filter_list(job.filter, job.index_number):
637
+ for filter_kind, _ in FilterBase.normalize_filter_list(job.filters, job.index_number):
613
638
  if filter_kind == 'shellpipe':
614
639
  return True
615
640
 
@@ -721,19 +746,17 @@ class YamlConfigStorage(BaseYamlFileStorage):
721
746
  :param config: The configuration.
722
747
  :raises ValueError: If the configuration has keys not in DEFAULT_CONFIG (bad keys, e.g. typos)
723
748
  """
724
- for key in {'chromium_revision'}:
725
- if key in config['job_defaults']['all'] or key in config['job_defaults']['browser']:
726
- warnings.warn(
727
- f'Directive {key} found in the configuration file {self.filename} has been deprecated'
728
- f'with the use of Playright. Please delete it (webchanges --edit-config)',
729
- DeprecationWarning,
730
- )
731
-
732
749
  config_for_extras = copy.deepcopy(config)
733
750
  if 'job_defaults' in config_for_extras:
734
751
  # Create missing 'job_defaults' keys from DEFAULT_CONFIG
735
752
  for key in DEFAULT_CONFIG['job_defaults']:
753
+ if 'job_defaults' not in config_for_extras:
754
+ config_for_extras['job_defaults'] = {}
736
755
  config_for_extras['job_defaults'][key] = None # type: ignore[literal-required]
756
+ for key in DEFAULT_CONFIG['differ_defaults']:
757
+ if 'differ_defaults' not in config_for_extras:
758
+ config_for_extras['differ_defaults'] = {}
759
+ config_for_extras['differ_defaults'][key] = None # type: ignore[literal-required]
737
760
  if 'hooks' in sys.modules:
738
761
  # Remove extra keys in config used in hooks (they are not in DEFAULT_CONFIG)
739
762
  for _, obj in inspect.getmembers(
@@ -742,6 +765,8 @@ class YamlConfigStorage(BaseYamlFileStorage):
742
765
  if issubclass(obj, JobBase):
743
766
  if obj.__kind__ not in DEFAULT_CONFIG['job_defaults'].keys():
744
767
  config_for_extras['job_defaults'].pop(obj.__kind__, None) # type: ignore[misc]
768
+ elif obj.__kind__ not in DEFAULT_CONFIG['job_defaults'].keys():
769
+ config_for_extras['job_defaults'].pop(obj.__kind__, None) # type: ignore[misc]
745
770
  elif issubclass(obj, ReporterBase):
746
771
  if obj.__kind__ not in DEFAULT_CONFIG['report'].keys():
747
772
  config_for_extras['report'].pop(obj.__kind__, None) # type: ignore[misc]
@@ -908,11 +933,11 @@ class YamlJobsStorage(BaseYamlFileStorage, JobsBaseFileStorage):
908
933
  + job_files_for_error()
909
934
  )
910
935
  )
911
- if not isinstance(job.filter, (NoneType, list)):
936
+ if not isinstance(job.filters, (NoneType, list)):
912
937
  raise ValueError(
913
938
  '\n '.join(
914
939
  [
915
- f"The 'filter' key needs to contain a list; found a {type(job.filter).__name__} ",
940
+ f"The 'filter' key needs to contain a list; found a {type(job.filters).__name__} ",
916
941
  f'in {job.get_indexed_location()}',
917
942
  ]
918
943
  + job_files_for_error()
@@ -108,7 +108,7 @@ def run_jobs(urlwatcher: Urlwatch) -> None:
108
108
  if job_state.tries > 0:
109
109
  job_state.tries = 0
110
110
  job_state.save()
111
- if job_state.old_error_data:
111
+ if job_state.old_error_data and job_state.job.suppress_repeated_errors:
112
112
  urlwatcher.report.unchanged_from_error(job_state)
113
113
  else:
114
114
  urlwatcher.report.unchanged(job_state)
@@ -140,7 +140,7 @@ def run_jobs(urlwatcher: Urlwatch) -> None:
140
140
  if job_state.tries > 0:
141
141
  job_state.tries = 0
142
142
  job_state.save()
143
- if job_state.old_error_data:
143
+ if job_state.old_error_data and job_state.job.suppress_repeated_errors:
144
144
  urlwatcher.report.unchanged_from_error(job_state)
145
145
  else:
146
146
  urlwatcher.report.unchanged(job_state)
@@ -195,7 +195,7 @@ def run_jobs(urlwatcher: Urlwatch) -> None:
195
195
  if urlwatcher.urlwatch_config.max_workers:
196
196
  max_workers = urlwatcher.urlwatch_config.max_workers
197
197
  else:
198
- max_workers = max(int(virt_mem / 200e6), 1)
198
+ max_workers = max(int(virt_mem / 400e6), 1)
199
199
  max_workers = min(max_workers, os.cpu_count() or 1)
200
200
  logger.debug(
201
201
  f"Running jobs that require Chrome (i.e. with 'use_browser: true') in parallel with {max_workers} "
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: webchanges
3
- Version: 3.27.0
3
+ Version: 3.28.1
4
4
  Summary: Web Changes Delivered. AI-Summarized. Totally Anonymous.
5
5
  Author-email: Mike Borsetti <mike+webchanges@borsetti.com>
6
6
  Maintainer-email: Mike Borsetti <mike+webchanges@borsetti.com>
@@ -114,6 +114,7 @@ Requires-Dist: msgpack
114
114
  Requires-Dist: platformdirs
115
115
  Requires-Dist: pyyaml
116
116
  Requires-Dist: tzdata; sys_platform == "win32"
117
+ Requires-Dist: zstandard
117
118
  Provides-Extra: use-browser
118
119
  Requires-Dist: playwright; extra == "use-browser"
119
120
  Requires-Dist: psutil; extra == "use-browser"
@@ -160,10 +161,8 @@ Provides-Extra: requests
160
161
  Requires-Dist: requests; extra == "requests"
161
162
  Provides-Extra: safe-password
162
163
  Requires-Dist: keyring; extra == "safe-password"
163
- Provides-Extra: zstd
164
- Requires-Dist: zstandard; extra == "zstd"
165
164
  Provides-Extra: all
166
- Requires-Dist: webchanges[beautify,bs4,deepdiff_xml,html5lib,ical2text,imagediff,jq,matrix,ocr,pdf2text,pushbullet,pushover,pypdf_crypto,redis,requests,safe_password,use_browser,xmpp,zstd]; extra == "all"
165
+ Requires-Dist: webchanges[beautify,bs4,deepdiff_xml,html5lib,ical2text,imagediff,jq,matrix,ocr,pdf2text,pushbullet,pushover,pypdf_crypto,redis,requests,safe_password,use_browser,xmpp]; extra == "all"
167
166
 
168
167
  .. role:: underline
169
168
  :class: underline
@@ -7,13 +7,14 @@ markdown2
7
7
  msgpack
8
8
  platformdirs
9
9
  pyyaml
10
+ zstandard
10
11
 
11
12
  [:sys_platform == "win32"]
12
13
  colorama
13
14
  tzdata
14
15
 
15
16
  [all]
16
- webchanges[beautify,bs4,deepdiff_xml,html5lib,ical2text,imagediff,jq,matrix,ocr,pdf2text,pushbullet,pushover,pypdf_crypto,redis,requests,safe_password,use_browser,xmpp,zstd]
17
+ webchanges[beautify,bs4,deepdiff_xml,html5lib,ical2text,imagediff,jq,matrix,ocr,pdf2text,pushbullet,pushover,pypdf_crypto,redis,requests,safe_password,use_browser,xmpp]
17
18
 
18
19
  [beautify]
19
20
  beautifulsoup4
@@ -82,6 +83,3 @@ psutil
82
83
 
83
84
  [xmpp]
84
85
  aioxmpp
85
-
86
- [zstd]
87
- zstandard
File without changes
File without changes
File without changes
File without changes