webchanges 3.27.0__tar.gz → 3.28.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {webchanges-3.27.0/webchanges.egg-info → webchanges-3.28.1}/PKG-INFO +3 -4
- {webchanges-3.27.0 → webchanges-3.28.1}/pyproject.toml +1 -2
- {webchanges-3.27.0 → webchanges-3.28.1}/requirements.txt +1 -0
- {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/__init__.py +1 -1
- {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/command.py +4 -4
- {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/config.py +2 -2
- {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/differs.py +38 -13
- {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/handler.py +12 -5
- {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/jobs.py +95 -66
- {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/reporters.py +6 -6
- {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/storage.py +36 -11
- {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/worker.py +3 -3
- {webchanges-3.27.0 → webchanges-3.28.1/webchanges.egg-info}/PKG-INFO +3 -4
- {webchanges-3.27.0 → webchanges-3.28.1}/webchanges.egg-info/requires.txt +2 -4
- {webchanges-3.27.0 → webchanges-3.28.1}/LICENSE +0 -0
- {webchanges-3.27.0 → webchanges-3.28.1}/MANIFEST.in +0 -0
- {webchanges-3.27.0 → webchanges-3.28.1}/README.rst +0 -0
- {webchanges-3.27.0 → webchanges-3.28.1}/setup.cfg +0 -0
- {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/_vendored/__init__.py +0 -0
- {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/_vendored/headers.py +0 -0
- {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/_vendored/packaging_version.py +0 -0
- {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/cli.py +0 -0
- {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/filters.py +0 -0
- {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/mailer.py +0 -0
- {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/main.py +0 -0
- {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/py.typed +0 -0
- {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/storage_minidb.py +0 -0
- {webchanges-3.27.0 → webchanges-3.28.1}/webchanges/util.py +0 -0
- {webchanges-3.27.0 → webchanges-3.28.1}/webchanges.egg-info/SOURCES.txt +0 -0
- {webchanges-3.27.0 → webchanges-3.28.1}/webchanges.egg-info/dependency_links.txt +0 -0
- {webchanges-3.27.0 → webchanges-3.28.1}/webchanges.egg-info/entry_points.txt +0 -0
- {webchanges-3.27.0 → webchanges-3.28.1}/webchanges.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: webchanges
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.28.1
|
|
4
4
|
Summary: Web Changes Delivered. AI-Summarized. Totally Anonymous.
|
|
5
5
|
Author-email: Mike Borsetti <mike+webchanges@borsetti.com>
|
|
6
6
|
Maintainer-email: Mike Borsetti <mike+webchanges@borsetti.com>
|
|
@@ -114,6 +114,7 @@ Requires-Dist: msgpack
|
|
|
114
114
|
Requires-Dist: platformdirs
|
|
115
115
|
Requires-Dist: pyyaml
|
|
116
116
|
Requires-Dist: tzdata; sys_platform == "win32"
|
|
117
|
+
Requires-Dist: zstandard
|
|
117
118
|
Provides-Extra: use-browser
|
|
118
119
|
Requires-Dist: playwright; extra == "use-browser"
|
|
119
120
|
Requires-Dist: psutil; extra == "use-browser"
|
|
@@ -160,10 +161,8 @@ Provides-Extra: requests
|
|
|
160
161
|
Requires-Dist: requests; extra == "requests"
|
|
161
162
|
Provides-Extra: safe-password
|
|
162
163
|
Requires-Dist: keyring; extra == "safe-password"
|
|
163
|
-
Provides-Extra: zstd
|
|
164
|
-
Requires-Dist: zstandard; extra == "zstd"
|
|
165
164
|
Provides-Extra: all
|
|
166
|
-
Requires-Dist: webchanges[beautify,bs4,deepdiff_xml,html5lib,ical2text,imagediff,jq,matrix,ocr,pdf2text,pushbullet,pushover,pypdf_crypto,redis,requests,safe_password,use_browser,xmpp
|
|
165
|
+
Requires-Dist: webchanges[beautify,bs4,deepdiff_xml,html5lib,ical2text,imagediff,jq,matrix,ocr,pdf2text,pushbullet,pushover,pypdf_crypto,redis,requests,safe_password,use_browser,xmpp]; extra == "all"
|
|
167
166
|
|
|
168
167
|
.. role:: underline
|
|
169
168
|
:class: underline
|
|
@@ -90,9 +90,8 @@ xmpp = ['aioxmpp']
|
|
|
90
90
|
redis = ['redis']
|
|
91
91
|
requests = ['requests']
|
|
92
92
|
safe_password = ['keyring']
|
|
93
|
-
zstd = ['zstandard']
|
|
94
93
|
all = [
|
|
95
|
-
'webchanges[use_browser,beautify,bs4,html5lib,ical2text,jq,ocr,pdf2text,pypdf_crypto,deepdiff_xml,imagediff,matrix,pushbullet,pushover,xmpp,redis,requests,safe_password
|
|
94
|
+
'webchanges[use_browser,beautify,bs4,html5lib,ical2text,jq,ocr,pdf2text,pypdf_crypto,deepdiff_xml,imagediff,matrix,pushbullet,pushover,xmpp,redis,requests,safe_password]'
|
|
96
95
|
]
|
|
97
96
|
|
|
98
97
|
|
|
@@ -22,7 +22,7 @@ __project_name__ = __package__
|
|
|
22
22
|
# * MINOR version when you add functionality in a backwards compatible manner, and
|
|
23
23
|
# * MICRO or PATCH version when you make backwards compatible bug fixes. We no longer use '0'
|
|
24
24
|
# If unsure on increments, use pkg_resources.parse_version to parse
|
|
25
|
-
__version__ = '3.
|
|
25
|
+
__version__ = '3.28.1'
|
|
26
26
|
__description__ = (
|
|
27
27
|
'Check web (or command output) for changes since last run and notify.\n'
|
|
28
28
|
'\n'
|
|
@@ -482,16 +482,16 @@ class UrlwatchCommand:
|
|
|
482
482
|
"""
|
|
483
483
|
Runs jobs that have no history to populate the snapshot database when they're newly added.
|
|
484
484
|
"""
|
|
485
|
-
new_jobs =
|
|
485
|
+
new_jobs = set()
|
|
486
486
|
for idx, job in enumerate(self.urlwatcher.jobs):
|
|
487
487
|
has_history = bool(self.urlwatcher.ssdb_storage.get_history_snapshots(job.get_guid()))
|
|
488
488
|
if not has_history:
|
|
489
489
|
print(f'Adding new {job.get_indexed_location()}')
|
|
490
|
-
new_jobs.
|
|
491
|
-
if not new_jobs:
|
|
490
|
+
new_jobs.add(idx + 1)
|
|
491
|
+
if not new_jobs and not self.urlwatch_config.joblist:
|
|
492
492
|
print('Found no new jobs to run.')
|
|
493
493
|
return
|
|
494
|
-
self.urlwatcher.urlwatch_config.joblist = new_jobs
|
|
494
|
+
self.urlwatcher.urlwatch_config.joblist = set(self.urlwatcher.urlwatch_config.joblist).union(new_jobs)
|
|
495
495
|
self.urlwatcher.run_jobs()
|
|
496
496
|
self.urlwatcher.close()
|
|
497
497
|
return
|
|
@@ -11,7 +11,7 @@ import textwrap
|
|
|
11
11
|
# import os
|
|
12
12
|
from dataclasses import dataclass, field
|
|
13
13
|
from pathlib import Path
|
|
14
|
-
from typing import
|
|
14
|
+
from typing import Collection
|
|
15
15
|
|
|
16
16
|
from webchanges import __doc__ as doc
|
|
17
17
|
from webchanges import __docs_url__, __project_name__, __version__
|
|
@@ -51,7 +51,7 @@ class CommandConfig(BaseConfig):
|
|
|
51
51
|
gc_database: int | None
|
|
52
52
|
hooks_files: list[Path]
|
|
53
53
|
install_chrome: bool
|
|
54
|
-
joblist:
|
|
54
|
+
joblist: Collection[str | int]
|
|
55
55
|
jobs_files: list[Path]
|
|
56
56
|
list_jobs: bool | str | None
|
|
57
57
|
log_file: Path
|
|
@@ -71,6 +71,7 @@ except ImportError as e: # pragma: no cover
|
|
|
71
71
|
# https://stackoverflow.com/questions/39740632
|
|
72
72
|
if TYPE_CHECKING:
|
|
73
73
|
from webchanges.handler import JobState
|
|
74
|
+
from webchanges.storage import _Config
|
|
74
75
|
|
|
75
76
|
|
|
76
77
|
logger = logging.getLogger(__name__)
|
|
@@ -123,7 +124,7 @@ class DifferBase(metaclass=TrackSubClasses):
|
|
|
123
124
|
"""
|
|
124
125
|
result: list[str] = []
|
|
125
126
|
for sc in TrackSubClasses.sorted_by_kind(cls):
|
|
126
|
-
#
|
|
127
|
+
# default_directive = getattr(sc, '__default_directive__', None)
|
|
127
128
|
result.extend((f' * {sc.__kind__} - {sc.__doc__}',))
|
|
128
129
|
if hasattr(sc, '__supported_directives__'):
|
|
129
130
|
for key, doc in sc.__supported_directives__.items():
|
|
@@ -136,30 +137,56 @@ class DifferBase(metaclass=TrackSubClasses):
|
|
|
136
137
|
cls,
|
|
137
138
|
differ_spec: dict[str, Any] | None,
|
|
138
139
|
job_index_number: int | None = None,
|
|
140
|
+
config: _Config | None = None,
|
|
139
141
|
) -> tuple[str, dict[str, Any]]:
|
|
140
142
|
"""Checks the differ_spec for its validity and applies default values.
|
|
141
143
|
|
|
142
144
|
:param differ_spec: The differ as entered by the user; use "unified" if empty.
|
|
143
145
|
:param job_index_number: The job index number.
|
|
144
|
-
:returns: A validated differ_kind,
|
|
146
|
+
:returns: A validated differ_kind, directives tuple.
|
|
145
147
|
"""
|
|
148
|
+
|
|
149
|
+
def directives_with_defaults(
|
|
150
|
+
differ_spec: str, directives: dict[str, Any], config: _Config | None = None
|
|
151
|
+
) -> dict[str, Any]:
|
|
152
|
+
"""Obtain differ subdirectives that also contains defaults from the configuration.
|
|
153
|
+
|
|
154
|
+
:param differ_kind: The differ kind.
|
|
155
|
+
:param directives: The differ directives as stated in the job.
|
|
156
|
+
:returns: directives inclusive of configuration defaults.
|
|
157
|
+
"""
|
|
158
|
+
if config is None:
|
|
159
|
+
logger.error('Cannot merge differ differdirectives with defaults as no config object was passed')
|
|
160
|
+
return directives
|
|
161
|
+
cfg = config.get('differ_defaults')
|
|
162
|
+
if isinstance(cfg, dict):
|
|
163
|
+
defaults: dict[str, Any] = cfg.get(differ_spec) # type: ignore[assignment]
|
|
164
|
+
if defaults:
|
|
165
|
+
for key, value in defaults.items():
|
|
166
|
+
if key not in directives:
|
|
167
|
+
directives[key] = value
|
|
168
|
+
return directives
|
|
169
|
+
|
|
146
170
|
differ_spec = differ_spec or {'name': 'unified'}
|
|
147
|
-
|
|
148
|
-
differ_kind =
|
|
171
|
+
directives = differ_spec.copy()
|
|
172
|
+
differ_kind = directives.pop('name', '')
|
|
149
173
|
if not differ_kind:
|
|
150
|
-
if list(
|
|
174
|
+
if list(directives.keys()) == ['command']:
|
|
151
175
|
differ_kind = 'command'
|
|
152
176
|
else:
|
|
153
177
|
raise ValueError(
|
|
154
178
|
f"Job {job_index_number}: Differ directive must have a 'name' sub-directive: {differ_spec}."
|
|
155
179
|
)
|
|
156
180
|
|
|
157
|
-
differcls = cls.__subclasses__.get(differ_kind, None)
|
|
181
|
+
differcls: DifferBase | None = cls.__subclasses__.get(differ_kind, None) # type: ignore[assignment]
|
|
158
182
|
if not differcls:
|
|
159
183
|
raise ValueError(f'Job {job_index_number}: No differ named {differ_kind}.')
|
|
160
184
|
|
|
185
|
+
if directives:
|
|
186
|
+
directives = directives_with_defaults(differ_kind, directives, config)
|
|
187
|
+
|
|
161
188
|
if hasattr(differcls, '__supported_directives__'):
|
|
162
|
-
provided_keys = set(
|
|
189
|
+
provided_keys = set(directives.keys())
|
|
163
190
|
allowed_keys = set(differcls.__supported_directives__.keys())
|
|
164
191
|
unknown_keys = provided_keys.difference(allowed_keys)
|
|
165
192
|
if unknown_keys and '<any>' not in allowed_keys:
|
|
@@ -168,7 +195,7 @@ class DifferBase(metaclass=TrackSubClasses):
|
|
|
168
195
|
f"{', '.join(unknown_keys)} (supported: {', '.join(sorted(allowed_keys))})."
|
|
169
196
|
)
|
|
170
197
|
|
|
171
|
-
return differ_kind,
|
|
198
|
+
return differ_kind, directives
|
|
172
199
|
|
|
173
200
|
@classmethod
|
|
174
201
|
def process(
|
|
@@ -1246,9 +1273,7 @@ class AIGoogleDiffer(DifferBase):
|
|
|
1246
1273
|
__kind__ = 'ai_google'
|
|
1247
1274
|
|
|
1248
1275
|
__supported_directives__ = {
|
|
1249
|
-
'model': (
|
|
1250
|
-
'model name from https://ai.google.dev/gemini-api/docs/models/gemini (default: gemini-1.5-flash-latest)'
|
|
1251
|
-
),
|
|
1276
|
+
'model': ('model name from https://ai.google.dev/gemini-api/docs/models/gemini (default: gemini-2.0-flash)'),
|
|
1252
1277
|
'system_instructions': (
|
|
1253
1278
|
'Optional tone and style instructions for the model (default: see documentation at'
|
|
1254
1279
|
'https://webchanges.readthedocs.io/en/stable/differs.html#ai-google-diff)'
|
|
@@ -1264,7 +1289,7 @@ class AIGoogleDiffer(DifferBase):
|
|
|
1264
1289
|
'tools': "data passed on to the API's 'tools' field (default: None)",
|
|
1265
1290
|
'unified': 'directives passed to the unified differ (default: None)',
|
|
1266
1291
|
}
|
|
1267
|
-
|
|
1292
|
+
__default_directive__ = 'model'
|
|
1268
1293
|
|
|
1269
1294
|
@staticmethod
|
|
1270
1295
|
def _send_to_model(
|
|
@@ -1279,7 +1304,7 @@ class AIGoogleDiffer(DifferBase):
|
|
|
1279
1304
|
if directives is None:
|
|
1280
1305
|
directives = {}
|
|
1281
1306
|
if 'model' not in directives:
|
|
1282
|
-
directives['model'] = 'gemini-
|
|
1307
|
+
directives['model'] = 'gemini-2.0-flash' # also for footer
|
|
1283
1308
|
model = directives.get('model')
|
|
1284
1309
|
timeout = directives.get('timeout', 300)
|
|
1285
1310
|
max_output_tokens = directives.get('max_output_tokens')
|
|
@@ -238,7 +238,7 @@ class JobState(ContextManager):
|
|
|
238
238
|
filtered_data, mime_type = FilterBase.auto_process(self, data, mime_type)
|
|
239
239
|
|
|
240
240
|
# Apply any specified filters
|
|
241
|
-
for filter_kind, subfilter in FilterBase.normalize_filter_list(self.job.
|
|
241
|
+
for filter_kind, subfilter in FilterBase.normalize_filter_list(self.job.filters, self.job.index_number):
|
|
242
242
|
filtered_data, mime_type = FilterBase.process(
|
|
243
243
|
filter_kind, subfilter, self, filtered_data, mime_type
|
|
244
244
|
)
|
|
@@ -293,6 +293,7 @@ class JobState(ContextManager):
|
|
|
293
293
|
report_kind: Literal['text', 'markdown', 'html'] = 'text',
|
|
294
294
|
differ: dict[str, Any] | None = None,
|
|
295
295
|
tz: ZoneInfo | None = None,
|
|
296
|
+
config: _Config | None = None,
|
|
296
297
|
) -> str:
|
|
297
298
|
"""Generates the job's diff and applies diff_filters to it (if any). Memoized.
|
|
298
299
|
|
|
@@ -305,14 +306,20 @@ class JobState(ContextManager):
|
|
|
305
306
|
return self.generated_diff[report_kind]
|
|
306
307
|
|
|
307
308
|
if report_kind not in self.unfiltered_diff:
|
|
308
|
-
differ_kind, subdiffer = DifferBase.normalize_differ(
|
|
309
|
+
differ_kind, subdiffer = DifferBase.normalize_differ(
|
|
310
|
+
differ or self.job.differ,
|
|
311
|
+
self.job.index_number,
|
|
312
|
+
config,
|
|
313
|
+
)
|
|
309
314
|
unfiltered_diff = DifferBase.process(differ_kind, subdiffer, self, report_kind, tz, self.unfiltered_diff)
|
|
310
315
|
self.unfiltered_diff.update(unfiltered_diff)
|
|
311
316
|
_generated_diff = self.unfiltered_diff[report_kind]
|
|
312
317
|
if _generated_diff:
|
|
313
318
|
# Apply any specified diff_filters
|
|
314
319
|
_mime_type = 'text/plain'
|
|
315
|
-
for filter_kind, subfilter in FilterBase.normalize_filter_list(
|
|
320
|
+
for filter_kind, subfilter in FilterBase.normalize_filter_list(
|
|
321
|
+
self.job.diff_filters, self.job.index_number
|
|
322
|
+
):
|
|
316
323
|
_generated_diff, _mime_type = FilterBase.process( # type: ignore[assignment]
|
|
317
324
|
filter_kind, subfilter, self, _generated_diff, _mime_type
|
|
318
325
|
)
|
|
@@ -363,7 +370,7 @@ class Report:
|
|
|
363
370
|
|
|
364
371
|
:param job_state: The JobState object with the information of the job run.
|
|
365
372
|
"""
|
|
366
|
-
if job_state.exception is not None and job_state.exception
|
|
373
|
+
if job_state.exception is not None and not isinstance(job_state.exception, NotModifiedError):
|
|
367
374
|
logger.info(
|
|
368
375
|
f'Job {job_state.job.index_number}: Got exception while processing job {job_state.job}',
|
|
369
376
|
exc_info=job_state.exception,
|
|
@@ -460,7 +467,7 @@ class Report:
|
|
|
460
467
|
if (
|
|
461
468
|
job_state.verb == 'changed'
|
|
462
469
|
and not self.config['display']['empty-diff']
|
|
463
|
-
and job_state.get_diff(tz=self.tz) == ''
|
|
470
|
+
and job_state.get_diff(tz=self.tz, config=self.config) == ''
|
|
464
471
|
):
|
|
465
472
|
return True
|
|
466
473
|
|
|
@@ -130,7 +130,6 @@ class JobBase(metaclass=TrackSubClasses):
|
|
|
130
130
|
_delay: float | None = None
|
|
131
131
|
additions_only: bool | None = None
|
|
132
132
|
block_elements: list[str] | None = None
|
|
133
|
-
chromium_revision: dict[str, int] | dict[str, str] | str | int | None = None # deprecated
|
|
134
133
|
compared_versions: int | None = None
|
|
135
134
|
contextlines: int | None = None
|
|
136
135
|
cookies: dict[str, str] | None = None
|
|
@@ -138,15 +137,13 @@ class JobBase(metaclass=TrackSubClasses):
|
|
|
138
137
|
data_as_json: bool | None = None
|
|
139
138
|
deletions_only: bool | None = None
|
|
140
139
|
differ: dict[str, Any] | None = None # added in 3.21
|
|
141
|
-
|
|
140
|
+
diff_filters: str | list[str | dict[str, Any]] | None = None
|
|
142
141
|
diff_tool: str | None = None # deprecated in 3.21
|
|
143
142
|
enabled: bool | None = None
|
|
144
143
|
encoding: str | None = None
|
|
145
|
-
|
|
144
|
+
filters: str | list[str | dict[str, Any]] | None = None
|
|
146
145
|
headers = Headers(encoding='utf-8')
|
|
147
146
|
http_client: Literal['httpx', 'requests'] | None = None
|
|
148
|
-
http_proxy: str | None = None
|
|
149
|
-
https_proxy: str | None = None
|
|
150
147
|
ignore_cached: bool | None = None
|
|
151
148
|
ignore_connection_errors: bool | None = None
|
|
152
149
|
ignore_default_args: bool | str | list[str] | None = None
|
|
@@ -172,6 +169,7 @@ class JobBase(metaclass=TrackSubClasses):
|
|
|
172
169
|
no_redirects: bool | None = None
|
|
173
170
|
note: str | None = None
|
|
174
171
|
params: str | list | dict | None = None
|
|
172
|
+
proxy: str | None = None
|
|
175
173
|
referer: str | None = None # Playwright
|
|
176
174
|
retries: int | None = None
|
|
177
175
|
ssl_no_verify: bool | None = None
|
|
@@ -191,6 +189,23 @@ class JobBase(metaclass=TrackSubClasses):
|
|
|
191
189
|
wait_until: Literal['commit', 'domcontentloaded', 'load', 'networkidle'] | None = None
|
|
192
190
|
|
|
193
191
|
def __init__(self, **kwargs: Any) -> None:
|
|
192
|
+
# backward-compatibility
|
|
193
|
+
if 'filter' in kwargs:
|
|
194
|
+
logger.info(f"Job {kwargs.get('index_number')}: Replacing deprecated directive 'filter' with 'filters'")
|
|
195
|
+
kwargs['filters'] = kwargs.pop('filter')
|
|
196
|
+
if 'diff_filter' in kwargs:
|
|
197
|
+
logger.info(
|
|
198
|
+
f"Job {kwargs.get('index_number')}: Replacing deprecated directive 'diff_filter' with 'diff_filter'"
|
|
199
|
+
)
|
|
200
|
+
kwargs['diff_filters'] = kwargs.pop('diff_filter')
|
|
201
|
+
if 'https_proxy' in kwargs:
|
|
202
|
+
logger.info(f"Job {kwargs.get('index_number')}: Replacing deprecated directive 'https_proxy' with 'proxy'")
|
|
203
|
+
kwargs['proxy'] = kwargs.pop('https_proxy')
|
|
204
|
+
kwargs.pop('http_proxy', None)
|
|
205
|
+
elif 'http_proxy' in kwargs:
|
|
206
|
+
logger.info(f"Job {kwargs.get('index_number')}: Replacing deprecated directive 'http_proxy' with 'proxy'")
|
|
207
|
+
kwargs['proxy'] = kwargs.pop('http_proxy')
|
|
208
|
+
|
|
194
209
|
# Fail if any required keys are not provided
|
|
195
210
|
for k in self.__required__:
|
|
196
211
|
if k not in kwargs:
|
|
@@ -348,7 +363,7 @@ class JobBase(metaclass=TrackSubClasses):
|
|
|
348
363
|
return job
|
|
349
364
|
|
|
350
365
|
def to_dict(self) -> dict:
|
|
351
|
-
"""Return all
|
|
366
|
+
"""Return all defined (not None) Job object directives, required and optional, as a serializable dict,
|
|
352
367
|
converting Headers object (which are not JSON serializable) to dicts.
|
|
353
368
|
|
|
354
369
|
:returns: A dict with all job directives as keys, ignoring those that are extras.
|
|
@@ -369,7 +384,8 @@ class JobBase(metaclass=TrackSubClasses):
|
|
|
369
384
|
:returns: A JobBase type object.
|
|
370
385
|
"""
|
|
371
386
|
for k in data.keys():
|
|
372
|
-
|
|
387
|
+
# backward-compatibility
|
|
388
|
+
if k not in cls.__required__ + cls.__optional__ + ('filter', 'diff_filter', 'http_client', 'http_proxy'):
|
|
373
389
|
if len(filenames) > 1:
|
|
374
390
|
jobs_files = ['in the concatenation of the jobs files:'] + [f'• {file},' for file in filenames]
|
|
375
391
|
elif len(filenames) == 1:
|
|
@@ -535,6 +551,22 @@ class JobBase(metaclass=TrackSubClasses):
|
|
|
535
551
|
if self.monospace is None:
|
|
536
552
|
self.monospace = True
|
|
537
553
|
|
|
554
|
+
def get_proxy(self) -> str | None:
|
|
555
|
+
"""Check that URL is http or https and return proxy value."""
|
|
556
|
+
scheme = urlsplit(self.url).scheme
|
|
557
|
+
if scheme not in {'http', 'https'}:
|
|
558
|
+
raise ValueError(
|
|
559
|
+
f'Job {self.index_number}: URL should start with https:// or http:// (check for typos): {self.url}'
|
|
560
|
+
)
|
|
561
|
+
proxy = self.proxy
|
|
562
|
+
if proxy is None:
|
|
563
|
+
if os.getenv((scheme + '_proxy').upper()):
|
|
564
|
+
proxy = os.getenv((scheme + '_proxy').upper())
|
|
565
|
+
logger.debug(
|
|
566
|
+
f"Job {self.index_number}: Setting proxy from environment variable {(scheme + '_proxy').upper()}"
|
|
567
|
+
)
|
|
568
|
+
return proxy
|
|
569
|
+
|
|
538
570
|
|
|
539
571
|
class Job(JobBase):
|
|
540
572
|
"""Job class for jobs."""
|
|
@@ -546,10 +578,10 @@ class Job(JobBase):
|
|
|
546
578
|
'contextlines',
|
|
547
579
|
'deletions_only',
|
|
548
580
|
'differ',
|
|
549
|
-
'
|
|
581
|
+
'diff_filters',
|
|
550
582
|
'diff_tool', # deprecated in 3.21
|
|
551
583
|
'enabled',
|
|
552
|
-
'
|
|
584
|
+
'filters',
|
|
553
585
|
'index_number',
|
|
554
586
|
'is_markdown',
|
|
555
587
|
'kind', # hooks.py
|
|
@@ -672,14 +704,12 @@ class UrlJob(UrlJobBase):
|
|
|
672
704
|
'data_as_json',
|
|
673
705
|
'encoding',
|
|
674
706
|
'headers',
|
|
675
|
-
'http_client',
|
|
676
|
-
'http_proxy',
|
|
677
|
-
'https_proxy',
|
|
678
707
|
'ignore_cached',
|
|
679
708
|
'ignore_dh_key_too_small',
|
|
680
709
|
'method',
|
|
681
710
|
'no_redirects',
|
|
682
711
|
'params',
|
|
712
|
+
'proxy',
|
|
683
713
|
'retries',
|
|
684
714
|
'ssl_no_verify',
|
|
685
715
|
'timeout',
|
|
@@ -716,17 +746,10 @@ class UrlJob(UrlJobBase):
|
|
|
716
746
|
f'Job {self.index_number}: Using the HTTPX HTTP client library (HTTP/2 support is not available since '
|
|
717
747
|
f'h2 is not installed)'
|
|
718
748
|
)
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
if
|
|
722
|
-
|
|
723
|
-
f'Job {self.index_number}: URL should start with https:// or http:// (check for typos): {self.url}'
|
|
724
|
-
)
|
|
725
|
-
if getattr(self, scheme + '_proxy'):
|
|
726
|
-
proxy = getattr(self, scheme + '_proxy')
|
|
727
|
-
elif os.getenv((scheme + '_proxy').upper()):
|
|
728
|
-
proxy = os.getenv((scheme + '_proxy').upper())
|
|
729
|
-
logger.debug(f'Job {self.index_number}: Proxies: {proxy}')
|
|
749
|
+
|
|
750
|
+
proxy = self.get_proxy()
|
|
751
|
+
if proxy is not None:
|
|
752
|
+
logger.debug(f'Job {self.index_number}: Proxy: {proxy}')
|
|
730
753
|
|
|
731
754
|
if self.ignore_dh_key_too_small:
|
|
732
755
|
logger.debug(
|
|
@@ -795,7 +818,7 @@ class UrlJob(UrlJobBase):
|
|
|
795
818
|
etag = response.headers.get('ETag', '')
|
|
796
819
|
mime_type = response.headers.get('Content-Type', '').split(';')[0]
|
|
797
820
|
|
|
798
|
-
if FilterBase.filter_chain_needs_bytes(self.
|
|
821
|
+
if FilterBase.filter_chain_needs_bytes(self.filters):
|
|
799
822
|
return response.content, etag, mime_type
|
|
800
823
|
|
|
801
824
|
if self.encoding:
|
|
@@ -812,13 +835,13 @@ class UrlJob(UrlJobBase):
|
|
|
812
835
|
:raises NotModifiedError: If an HTTP 304 response is received.
|
|
813
836
|
"""
|
|
814
837
|
logger.info(f'Job {self.index_number}: Using the requests HTTP client library')
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
proxies = {scheme:
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
838
|
+
proxy_str = self.get_proxy()
|
|
839
|
+
if proxy_str is not None:
|
|
840
|
+
scheme = urlsplit(self.url).scheme
|
|
841
|
+
proxies = {scheme: proxy_str}
|
|
842
|
+
logger.debug(f'Job {self.index_number}: Proxies: {proxies}')
|
|
843
|
+
else:
|
|
844
|
+
proxies = None
|
|
822
845
|
|
|
823
846
|
if self.ssl_no_verify:
|
|
824
847
|
# required to suppress warnings with 'ssl_no_verify: true'
|
|
@@ -901,7 +924,7 @@ class UrlJob(UrlJobBase):
|
|
|
901
924
|
etag = response.headers.get('ETag', '')
|
|
902
925
|
mime_type = response.headers.get('Content-Type', '').split(';')[0]
|
|
903
926
|
|
|
904
|
-
if FilterBase.filter_chain_needs_bytes(self.
|
|
927
|
+
if FilterBase.filter_chain_needs_bytes(self.filters):
|
|
905
928
|
return response.content, etag, mime_type
|
|
906
929
|
|
|
907
930
|
if self.encoding:
|
|
@@ -939,7 +962,7 @@ class UrlJob(UrlJobBase):
|
|
|
939
962
|
else:
|
|
940
963
|
filename = Path(str(urlparse(self.url).path))
|
|
941
964
|
|
|
942
|
-
if FilterBase.filter_chain_needs_bytes(self.
|
|
965
|
+
if FilterBase.filter_chain_needs_bytes(self.filters):
|
|
943
966
|
return filename.read_bytes(), '', 'application/octet-stream'
|
|
944
967
|
else:
|
|
945
968
|
return filename.read_text(), '', 'text/plain'
|
|
@@ -955,7 +978,7 @@ class UrlJob(UrlJobBase):
|
|
|
955
978
|
str(password),
|
|
956
979
|
timeout=self.timeout,
|
|
957
980
|
) as ftp:
|
|
958
|
-
if FilterBase.filter_chain_needs_bytes(self.
|
|
981
|
+
if FilterBase.filter_chain_needs_bytes(self.filters):
|
|
959
982
|
data_bytes = b''
|
|
960
983
|
|
|
961
984
|
def callback_bytes(dt: bytes) -> None:
|
|
@@ -1020,14 +1043,27 @@ class UrlJob(UrlJobBase):
|
|
|
1020
1043
|
logger.debug(f'Job {self.index_number}: Cookies: {self.cookies}')
|
|
1021
1044
|
|
|
1022
1045
|
if self.http_client == 'requests' or not httpx:
|
|
1046
|
+
if isinstance(requests, str):
|
|
1047
|
+
message = f'Job {job_state.job.index_number} cannot be run '
|
|
1048
|
+
if self.http_client == 'requests':
|
|
1049
|
+
message += "with 'http_client: requests "
|
|
1050
|
+
message += (
|
|
1051
|
+
f'( {self.get_indexed_location()} ):\n{requests}\n'
|
|
1052
|
+
f"Please install module using e.g. 'pip install --upgrade webchanges[requests]'."
|
|
1053
|
+
)
|
|
1054
|
+
raise ImportError(message)
|
|
1023
1055
|
job_state._http_client_used = 'requests'
|
|
1024
1056
|
data, etag, mime_type = self._retrieve_requests(headers=headers, timeout=timeout)
|
|
1025
1057
|
elif not self.http_client or self.http_client == 'httpx':
|
|
1026
1058
|
if isinstance(httpx, str):
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1059
|
+
message = f'Job {job_state.job.index_number} cannot be run '
|
|
1060
|
+
if self.http_client == 'httpx':
|
|
1061
|
+
message += "with 'http_client: httpx "
|
|
1062
|
+
message += (
|
|
1063
|
+
f'( {self.get_indexed_location()} ):\n{httpx}\n'
|
|
1064
|
+
f"Please install module using e.g. 'pip install --upgrade httpx[http2,zstd]'."
|
|
1030
1065
|
)
|
|
1066
|
+
raise ImportError(message)
|
|
1031
1067
|
job_state._http_client_used = 'HTTPX'
|
|
1032
1068
|
data, etag, mime_type = self._retrieve_httpx(headers=headers, timeout=timeout)
|
|
1033
1069
|
else:
|
|
@@ -1070,7 +1106,7 @@ class UrlJob(UrlJobBase):
|
|
|
1070
1106
|
# Instead of a full traceback, just show the error
|
|
1071
1107
|
exception_str = str(exception).strip()
|
|
1072
1108
|
print(f'{exception_str=} {exception.args=} {type(exception)=}')
|
|
1073
|
-
if
|
|
1109
|
+
if self.proxy and (
|
|
1074
1110
|
(httpx and isinstance(exception, httpx.TransportError))
|
|
1075
1111
|
or any(
|
|
1076
1112
|
exception_str.startswith(error_string)
|
|
@@ -1096,7 +1132,7 @@ class UrlJob(UrlJobBase):
|
|
|
1096
1132
|
)
|
|
1097
1133
|
)
|
|
1098
1134
|
):
|
|
1099
|
-
exception_str += f'\n\n(Job has proxy {self.
|
|
1135
|
+
exception_str += f'\n\n(Job has proxy {self.proxy})'
|
|
1100
1136
|
return exception_str
|
|
1101
1137
|
return tb
|
|
1102
1138
|
|
|
@@ -1160,13 +1196,10 @@ class BrowserJob(UrlJobBase):
|
|
|
1160
1196
|
__required__: tuple[str, ...] = ('use_browser',)
|
|
1161
1197
|
__optional__: tuple[str, ...] = (
|
|
1162
1198
|
'block_elements',
|
|
1163
|
-
'chromium_revision', # deprecated
|
|
1164
1199
|
'cookies',
|
|
1165
1200
|
'data',
|
|
1166
1201
|
'data_as_json',
|
|
1167
1202
|
'headers',
|
|
1168
|
-
'http_proxy',
|
|
1169
|
-
'https_proxy',
|
|
1170
1203
|
'ignore_default_args', # Playwright
|
|
1171
1204
|
'ignore_https_errors',
|
|
1172
1205
|
'init_script', # Playwright,
|
|
@@ -1175,6 +1208,7 @@ class BrowserJob(UrlJobBase):
|
|
|
1175
1208
|
'method',
|
|
1176
1209
|
'navigate',
|
|
1177
1210
|
'params',
|
|
1211
|
+
'proxy',
|
|
1178
1212
|
'switches',
|
|
1179
1213
|
'timeout',
|
|
1180
1214
|
'user_data_dir',
|
|
@@ -1225,7 +1259,7 @@ class BrowserJob(UrlJobBase):
|
|
|
1225
1259
|
try:
|
|
1226
1260
|
from playwright._repo_version import version as playwright_version
|
|
1227
1261
|
from playwright.sync_api import Error as PlaywrightError
|
|
1228
|
-
from playwright.sync_api import
|
|
1262
|
+
from playwright.sync_api import Route, sync_playwright
|
|
1229
1263
|
except ImportError: # pragma: no cover
|
|
1230
1264
|
raise ImportError(
|
|
1231
1265
|
f"Python package 'playwright' is not installed; cannot run jobs with the 'use_browser: true' "
|
|
@@ -1272,26 +1306,21 @@ class BrowserJob(UrlJobBase):
|
|
|
1272
1306
|
|
|
1273
1307
|
headers = self.get_headers(job_state, user_agent=None)
|
|
1274
1308
|
|
|
1275
|
-
|
|
1276
|
-
if
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
proxy_split
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
else ''
|
|
1291
|
-
),
|
|
1292
|
-
'username': str(proxy_split.username),
|
|
1293
|
-
'password': str(proxy_split.password),
|
|
1294
|
-
}
|
|
1309
|
+
proxy_str = self.get_proxy()
|
|
1310
|
+
if proxy_str is not None:
|
|
1311
|
+
proxy_split: SplitResult | SplitResultBytes = urlsplit(proxy_str)
|
|
1312
|
+
proxy = {
|
|
1313
|
+
'server': (
|
|
1314
|
+
f'{proxy_split.scheme!s}://{proxy_split.hostname!s}:{proxy_split.port!s}'
|
|
1315
|
+
if proxy_split.port
|
|
1316
|
+
else ''
|
|
1317
|
+
),
|
|
1318
|
+
'username': str(proxy_split.username),
|
|
1319
|
+
'password': str(proxy_split.password),
|
|
1320
|
+
}
|
|
1321
|
+
logger.debug(f'Job {self.index_number}: Proxy: {proxy}')
|
|
1322
|
+
else:
|
|
1323
|
+
proxy = None
|
|
1295
1324
|
|
|
1296
1325
|
if self.switches:
|
|
1297
1326
|
if isinstance(self.switches, str):
|
|
@@ -1784,8 +1813,8 @@ class BrowserJob(UrlJobBase):
|
|
|
1784
1813
|
"""
|
|
1785
1814
|
exception_str = f'Browser error in {str(exception).strip()}'
|
|
1786
1815
|
print(f'{exception_str=}, {tb=}')
|
|
1787
|
-
if
|
|
1788
|
-
exception_str += f'\n\n(Job has proxy {self.
|
|
1816
|
+
if self.proxy and 'net::ERR' in exception_str:
|
|
1817
|
+
exception_str += f'\n\n(Job has proxy {self.proxy})'
|
|
1789
1818
|
return exception_str
|
|
1790
1819
|
return exception_str
|
|
1791
1820
|
|
|
@@ -1936,7 +1965,7 @@ class ShellJob(Job):
|
|
|
1936
1965
|
:raises subprocess.TimeoutExpired: Subclass of SubprocessError, raised when a timeout expires while waiting for
|
|
1937
1966
|
a child process.
|
|
1938
1967
|
"""
|
|
1939
|
-
needs_bytes = FilterBase.filter_chain_needs_bytes(self.
|
|
1968
|
+
needs_bytes = FilterBase.filter_chain_needs_bytes(self.filters)
|
|
1940
1969
|
try:
|
|
1941
1970
|
return (
|
|
1942
1971
|
subprocess.run(
|
|
@@ -466,7 +466,7 @@ class TextReporter(ReporterBase):
|
|
|
466
466
|
location = job_state.job.get_location()
|
|
467
467
|
if pretty_name != location:
|
|
468
468
|
location = f'{pretty_name} ({location})'
|
|
469
|
-
yield ': '.join((job_state.verb.upper(), location))
|
|
469
|
+
yield ': '.join((job_state.verb.replace('_', ' ').upper(), location))
|
|
470
470
|
if hasattr(job_state.job, 'note') and job_state.job.note:
|
|
471
471
|
yield job_state.job.note
|
|
472
472
|
return
|
|
@@ -549,8 +549,8 @@ class TextReporter(ReporterBase):
|
|
|
549
549
|
location = job_state.job.get_location()
|
|
550
550
|
if pretty_name != location:
|
|
551
551
|
location = f'{pretty_name} ({location})'
|
|
552
|
-
pretty_summary = ': '.join((job_state.verb.upper(), pretty_name))
|
|
553
|
-
summary = ': '.join((job_state.verb.upper(), location))
|
|
552
|
+
pretty_summary = ': '.join((job_state.verb.replace('_', ' ').upper(), pretty_name))
|
|
553
|
+
summary = ': '.join((job_state.verb.replace('_', ' ').upper(), location))
|
|
554
554
|
differ = job_state.job.differ or {}
|
|
555
555
|
content = self._format_content(job_state, differ)
|
|
556
556
|
# self._format_content may update verb to 'changed,no_report'
|
|
@@ -597,7 +597,7 @@ class MarkdownReporter(ReporterBase):
|
|
|
597
597
|
location = job_state.job.get_location()
|
|
598
598
|
if pretty_name != location:
|
|
599
599
|
location = f'{pretty_name} ({location})'
|
|
600
|
-
yield f"* {': '.join((job_state.verb.upper(), location))}"
|
|
600
|
+
yield f"* {': '.join((job_state.verb.replace('_', ' ').upper(), location))}"
|
|
601
601
|
if hasattr(job_state.job, 'note') and job_state.job.note:
|
|
602
602
|
yield job_state.job.note
|
|
603
603
|
return
|
|
@@ -812,8 +812,8 @@ class MarkdownReporter(ReporterBase):
|
|
|
812
812
|
else:
|
|
813
813
|
location = f'{pretty_name} ({location})'
|
|
814
814
|
|
|
815
|
-
pretty_summary = ': '.join((job_state.verb.upper(), pretty_name))
|
|
816
|
-
summary = ': '.join((job_state.verb.upper(), location))
|
|
815
|
+
pretty_summary = ': '.join((job_state.verb.replace('_', ' ').upper(), pretty_name))
|
|
816
|
+
summary = ': '.join((job_state.verb.replace('_', ' ').upper(), location))
|
|
817
817
|
differ = job_state.job.differ or {}
|
|
818
818
|
content = self._format_content(job_state, differ) # may update verb to 'changed,no_report'
|
|
819
819
|
if job_state.verb == 'changed,no_report':
|
|
@@ -287,6 +287,20 @@ _ConfigJobDefaults = TypedDict(
|
|
|
287
287
|
},
|
|
288
288
|
total=False,
|
|
289
289
|
)
|
|
290
|
+
_ConfigDifferDefaults = TypedDict(
|
|
291
|
+
'_ConfigDifferDefaults',
|
|
292
|
+
{
|
|
293
|
+
'_note': str,
|
|
294
|
+
'unified': dict[str, Any],
|
|
295
|
+
'ai_google': dict[str, Any],
|
|
296
|
+
'command': dict[str, Any],
|
|
297
|
+
'deepdiff': dict[str, Any],
|
|
298
|
+
'image': dict[str, Any],
|
|
299
|
+
'table': dict[str, Any],
|
|
300
|
+
'wdiff': dict[str, Any],
|
|
301
|
+
},
|
|
302
|
+
total=False,
|
|
303
|
+
)
|
|
290
304
|
_ConfigDatabase = TypedDict(
|
|
291
305
|
'_ConfigDatabase',
|
|
292
306
|
{
|
|
@@ -300,6 +314,7 @@ _Config = TypedDict(
|
|
|
300
314
|
'display': _ConfigDisplay,
|
|
301
315
|
'report': _ConfigReport,
|
|
302
316
|
'job_defaults': _ConfigJobDefaults,
|
|
317
|
+
'differ_defaults': _ConfigDifferDefaults,
|
|
303
318
|
'database': _ConfigDatabase,
|
|
304
319
|
'footnote': str | None,
|
|
305
320
|
},
|
|
@@ -447,6 +462,16 @@ DEFAULT_CONFIG: _Config = {
|
|
|
447
462
|
'browser': {'_note': "These are used for 'url' jobs with 'use_browser: true'."},
|
|
448
463
|
'command': {'_note': "These are used for 'command' jobs."},
|
|
449
464
|
},
|
|
465
|
+
'differ_defaults': {
|
|
466
|
+
'_note': 'Default directives that are applied to individual differs.',
|
|
467
|
+
'unified': {},
|
|
468
|
+
'ai_google': {},
|
|
469
|
+
'command': {},
|
|
470
|
+
'deepdiff': {},
|
|
471
|
+
'image': {},
|
|
472
|
+
'table': {},
|
|
473
|
+
'wdiff': {},
|
|
474
|
+
},
|
|
450
475
|
'database': {
|
|
451
476
|
'engine': 'sqlite3',
|
|
452
477
|
'max_snapshots': 4,
|
|
@@ -609,7 +634,7 @@ class JobsBaseFileStorage(BaseTextualFileStorage, ABC):
|
|
|
609
634
|
if isinstance(job, ShellJob):
|
|
610
635
|
return True
|
|
611
636
|
|
|
612
|
-
for filter_kind,
|
|
637
|
+
for filter_kind, _ in FilterBase.normalize_filter_list(job.filters, job.index_number):
|
|
613
638
|
if filter_kind == 'shellpipe':
|
|
614
639
|
return True
|
|
615
640
|
|
|
@@ -721,19 +746,17 @@ class YamlConfigStorage(BaseYamlFileStorage):
|
|
|
721
746
|
:param config: The configuration.
|
|
722
747
|
:raises ValueError: If the configuration has keys not in DEFAULT_CONFIG (bad keys, e.g. typos)
|
|
723
748
|
"""
|
|
724
|
-
for key in {'chromium_revision'}:
|
|
725
|
-
if key in config['job_defaults']['all'] or key in config['job_defaults']['browser']:
|
|
726
|
-
warnings.warn(
|
|
727
|
-
f'Directive {key} found in the configuration file {self.filename} has been deprecated'
|
|
728
|
-
f'with the use of Playright. Please delete it (webchanges --edit-config)',
|
|
729
|
-
DeprecationWarning,
|
|
730
|
-
)
|
|
731
|
-
|
|
732
749
|
config_for_extras = copy.deepcopy(config)
|
|
733
750
|
if 'job_defaults' in config_for_extras:
|
|
734
751
|
# Create missing 'job_defaults' keys from DEFAULT_CONFIG
|
|
735
752
|
for key in DEFAULT_CONFIG['job_defaults']:
|
|
753
|
+
if 'job_defaults' not in config_for_extras:
|
|
754
|
+
config_for_extras['job_defaults'] = {}
|
|
736
755
|
config_for_extras['job_defaults'][key] = None # type: ignore[literal-required]
|
|
756
|
+
for key in DEFAULT_CONFIG['differ_defaults']:
|
|
757
|
+
if 'differ_defaults' not in config_for_extras:
|
|
758
|
+
config_for_extras['differ_defaults'] = {}
|
|
759
|
+
config_for_extras['differ_defaults'][key] = None # type: ignore[literal-required]
|
|
737
760
|
if 'hooks' in sys.modules:
|
|
738
761
|
# Remove extra keys in config used in hooks (they are not in DEFAULT_CONFIG)
|
|
739
762
|
for _, obj in inspect.getmembers(
|
|
@@ -742,6 +765,8 @@ class YamlConfigStorage(BaseYamlFileStorage):
|
|
|
742
765
|
if issubclass(obj, JobBase):
|
|
743
766
|
if obj.__kind__ not in DEFAULT_CONFIG['job_defaults'].keys():
|
|
744
767
|
config_for_extras['job_defaults'].pop(obj.__kind__, None) # type: ignore[misc]
|
|
768
|
+
elif obj.__kind__ not in DEFAULT_CONFIG['job_defaults'].keys():
|
|
769
|
+
config_for_extras['job_defaults'].pop(obj.__kind__, None) # type: ignore[misc]
|
|
745
770
|
elif issubclass(obj, ReporterBase):
|
|
746
771
|
if obj.__kind__ not in DEFAULT_CONFIG['report'].keys():
|
|
747
772
|
config_for_extras['report'].pop(obj.__kind__, None) # type: ignore[misc]
|
|
@@ -908,11 +933,11 @@ class YamlJobsStorage(BaseYamlFileStorage, JobsBaseFileStorage):
|
|
|
908
933
|
+ job_files_for_error()
|
|
909
934
|
)
|
|
910
935
|
)
|
|
911
|
-
if not isinstance(job.
|
|
936
|
+
if not isinstance(job.filters, (NoneType, list)):
|
|
912
937
|
raise ValueError(
|
|
913
938
|
'\n '.join(
|
|
914
939
|
[
|
|
915
|
-
f"The 'filter' key needs to contain a list; found a {type(job.
|
|
940
|
+
f"The 'filter' key needs to contain a list; found a {type(job.filters).__name__} ",
|
|
916
941
|
f'in {job.get_indexed_location()}',
|
|
917
942
|
]
|
|
918
943
|
+ job_files_for_error()
|
|
@@ -108,7 +108,7 @@ def run_jobs(urlwatcher: Urlwatch) -> None:
|
|
|
108
108
|
if job_state.tries > 0:
|
|
109
109
|
job_state.tries = 0
|
|
110
110
|
job_state.save()
|
|
111
|
-
if job_state.old_error_data:
|
|
111
|
+
if job_state.old_error_data and job_state.job.suppress_repeated_errors:
|
|
112
112
|
urlwatcher.report.unchanged_from_error(job_state)
|
|
113
113
|
else:
|
|
114
114
|
urlwatcher.report.unchanged(job_state)
|
|
@@ -140,7 +140,7 @@ def run_jobs(urlwatcher: Urlwatch) -> None:
|
|
|
140
140
|
if job_state.tries > 0:
|
|
141
141
|
job_state.tries = 0
|
|
142
142
|
job_state.save()
|
|
143
|
-
if job_state.old_error_data:
|
|
143
|
+
if job_state.old_error_data and job_state.job.suppress_repeated_errors:
|
|
144
144
|
urlwatcher.report.unchanged_from_error(job_state)
|
|
145
145
|
else:
|
|
146
146
|
urlwatcher.report.unchanged(job_state)
|
|
@@ -195,7 +195,7 @@ def run_jobs(urlwatcher: Urlwatch) -> None:
|
|
|
195
195
|
if urlwatcher.urlwatch_config.max_workers:
|
|
196
196
|
max_workers = urlwatcher.urlwatch_config.max_workers
|
|
197
197
|
else:
|
|
198
|
-
max_workers = max(int(virt_mem /
|
|
198
|
+
max_workers = max(int(virt_mem / 400e6), 1)
|
|
199
199
|
max_workers = min(max_workers, os.cpu_count() or 1)
|
|
200
200
|
logger.debug(
|
|
201
201
|
f"Running jobs that require Chrome (i.e. with 'use_browser: true') in parallel with {max_workers} "
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: webchanges
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.28.1
|
|
4
4
|
Summary: Web Changes Delivered. AI-Summarized. Totally Anonymous.
|
|
5
5
|
Author-email: Mike Borsetti <mike+webchanges@borsetti.com>
|
|
6
6
|
Maintainer-email: Mike Borsetti <mike+webchanges@borsetti.com>
|
|
@@ -114,6 +114,7 @@ Requires-Dist: msgpack
|
|
|
114
114
|
Requires-Dist: platformdirs
|
|
115
115
|
Requires-Dist: pyyaml
|
|
116
116
|
Requires-Dist: tzdata; sys_platform == "win32"
|
|
117
|
+
Requires-Dist: zstandard
|
|
117
118
|
Provides-Extra: use-browser
|
|
118
119
|
Requires-Dist: playwright; extra == "use-browser"
|
|
119
120
|
Requires-Dist: psutil; extra == "use-browser"
|
|
@@ -160,10 +161,8 @@ Provides-Extra: requests
|
|
|
160
161
|
Requires-Dist: requests; extra == "requests"
|
|
161
162
|
Provides-Extra: safe-password
|
|
162
163
|
Requires-Dist: keyring; extra == "safe-password"
|
|
163
|
-
Provides-Extra: zstd
|
|
164
|
-
Requires-Dist: zstandard; extra == "zstd"
|
|
165
164
|
Provides-Extra: all
|
|
166
|
-
Requires-Dist: webchanges[beautify,bs4,deepdiff_xml,html5lib,ical2text,imagediff,jq,matrix,ocr,pdf2text,pushbullet,pushover,pypdf_crypto,redis,requests,safe_password,use_browser,xmpp
|
|
165
|
+
Requires-Dist: webchanges[beautify,bs4,deepdiff_xml,html5lib,ical2text,imagediff,jq,matrix,ocr,pdf2text,pushbullet,pushover,pypdf_crypto,redis,requests,safe_password,use_browser,xmpp]; extra == "all"
|
|
167
166
|
|
|
168
167
|
.. role:: underline
|
|
169
168
|
:class: underline
|
|
@@ -7,13 +7,14 @@ markdown2
|
|
|
7
7
|
msgpack
|
|
8
8
|
platformdirs
|
|
9
9
|
pyyaml
|
|
10
|
+
zstandard
|
|
10
11
|
|
|
11
12
|
[:sys_platform == "win32"]
|
|
12
13
|
colorama
|
|
13
14
|
tzdata
|
|
14
15
|
|
|
15
16
|
[all]
|
|
16
|
-
webchanges[beautify,bs4,deepdiff_xml,html5lib,ical2text,imagediff,jq,matrix,ocr,pdf2text,pushbullet,pushover,pypdf_crypto,redis,requests,safe_password,use_browser,xmpp
|
|
17
|
+
webchanges[beautify,bs4,deepdiff_xml,html5lib,ical2text,imagediff,jq,matrix,ocr,pdf2text,pushbullet,pushover,pypdf_crypto,redis,requests,safe_password,use_browser,xmpp]
|
|
17
18
|
|
|
18
19
|
[beautify]
|
|
19
20
|
beautifulsoup4
|
|
@@ -82,6 +83,3 @@ psutil
|
|
|
82
83
|
|
|
83
84
|
[xmpp]
|
|
84
85
|
aioxmpp
|
|
85
|
-
|
|
86
|
-
[zstd]
|
|
87
|
-
zstandard
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|