udata 12.0.2.dev10__py3-none-any.whl → 12.0.2.dev12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of udata might be problematic. Click here for more details.
- udata/commands/__init__.py +0 -1
- udata/core/dataset/api.py +0 -15
- udata/core/dataset/models.py +8 -38
- udata/entrypoints.py +0 -1
- udata/models/__init__.py +0 -2
- udata/mongo/extras_fields.py +4 -3
- udata/settings.py +0 -10
- {udata-12.0.2.dev10.dist-info → udata-12.0.2.dev12.dist-info}/METADATA +1 -3
- {udata-12.0.2.dev10.dist-info → udata-12.0.2.dev12.dist-info}/RECORD +13 -20
- udata/linkchecker/__init__.py +0 -0
- udata/linkchecker/backends.py +0 -31
- udata/linkchecker/checker.py +0 -75
- udata/linkchecker/commands.py +0 -21
- udata/linkchecker/models.py +0 -9
- udata/linkchecker/tasks.py +0 -55
- udata/tests/test_linkchecker.py +0 -277
- {udata-12.0.2.dev10.dist-info → udata-12.0.2.dev12.dist-info}/WHEEL +0 -0
- {udata-12.0.2.dev10.dist-info → udata-12.0.2.dev12.dist-info}/entry_points.txt +0 -0
- {udata-12.0.2.dev10.dist-info → udata-12.0.2.dev12.dist-info}/licenses/LICENSE +0 -0
- {udata-12.0.2.dev10.dist-info → udata-12.0.2.dev12.dist-info}/top_level.txt +0 -0
udata/commands/__init__.py
CHANGED
udata/core/dataset/api.py
CHANGED
|
@@ -46,7 +46,6 @@ from udata.core.storages.api import handle_upload, upload_parser
|
|
|
46
46
|
from udata.core.topic.models import Topic
|
|
47
47
|
from udata.frontend.markdown import md
|
|
48
48
|
from udata.i18n import gettext as _
|
|
49
|
-
from udata.linkchecker.checker import check_resource
|
|
50
49
|
from udata.rdf import RDF_EXTENSIONS, graph_response, negociate_content
|
|
51
50
|
from udata.utils import get_by
|
|
52
51
|
|
|
@@ -902,20 +901,6 @@ class AllowedExtensionsAPI(API):
|
|
|
902
901
|
return sorted(current_app.config["ALLOWED_RESOURCES_EXTENSIONS"])
|
|
903
902
|
|
|
904
903
|
|
|
905
|
-
@ns.route(
|
|
906
|
-
"/<dataset:dataset>/resources/<uuid:rid>/check/",
|
|
907
|
-
endpoint="check_dataset_resource",
|
|
908
|
-
doc=common_doc,
|
|
909
|
-
)
|
|
910
|
-
@api.param("rid", "The resource unique identifier")
|
|
911
|
-
class CheckDatasetResource(API, ResourceMixin):
|
|
912
|
-
@api.doc("check_dataset_resource")
|
|
913
|
-
def get(self, dataset, rid):
|
|
914
|
-
"""Checks that a resource's URL exists and returns metadata."""
|
|
915
|
-
resource = self.get_resource_or_404(dataset, rid)
|
|
916
|
-
return check_resource(resource)
|
|
917
|
-
|
|
918
|
-
|
|
919
904
|
@ns.route("/resource_types/", endpoint="resource_types")
|
|
920
905
|
class ResourceTypesAPI(API):
|
|
921
906
|
@api.doc("resource_types")
|
udata/core/dataset/models.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import re
|
|
3
|
-
from datetime import datetime
|
|
3
|
+
from datetime import datetime
|
|
4
4
|
from pydoc import locate
|
|
5
5
|
from typing import Self
|
|
6
6
|
from urllib.parse import urlparse
|
|
@@ -8,7 +8,6 @@ from urllib.parse import urlparse
|
|
|
8
8
|
import Levenshtein
|
|
9
9
|
import requests
|
|
10
10
|
from blinker import signal
|
|
11
|
-
from dateutil.parser import parse as parse_dt
|
|
12
11
|
from flask import current_app, url_for
|
|
13
12
|
from mongoengine import ValidationError as MongoEngineValidationError
|
|
14
13
|
from mongoengine.fields import DateTimeField
|
|
@@ -369,7 +368,13 @@ class ResourceMixin(object):
|
|
|
369
368
|
mime = db.StringField()
|
|
370
369
|
filesize = db.IntField() # `size` is a reserved keyword for mongoengine.
|
|
371
370
|
fs_filename = db.StringField()
|
|
372
|
-
extras = db.ExtrasField(
|
|
371
|
+
extras = db.ExtrasField(
|
|
372
|
+
{
|
|
373
|
+
"check:available": db.BooleanField,
|
|
374
|
+
"check:status": db.IntField,
|
|
375
|
+
"check:date": db.DateTimeField,
|
|
376
|
+
}
|
|
377
|
+
)
|
|
373
378
|
harvest = db.EmbeddedDocumentField(HarvestResourceMetadata)
|
|
374
379
|
schema = db.EmbeddedDocumentField(Schema)
|
|
375
380
|
|
|
@@ -428,41 +433,6 @@ class ResourceMixin(object):
|
|
|
428
433
|
"""
|
|
429
434
|
return self.extras.get("check:available", "unknown")
|
|
430
435
|
|
|
431
|
-
def need_check(self):
|
|
432
|
-
"""Does the resource needs to be checked against its linkchecker?
|
|
433
|
-
|
|
434
|
-
We check unavailable resources often, unless they go over the
|
|
435
|
-
threshold. Available resources are checked less and less frequently
|
|
436
|
-
based on their historical availability.
|
|
437
|
-
"""
|
|
438
|
-
min_cache_duration, max_cache_duration, ko_threshold = [
|
|
439
|
-
current_app.config.get(k)
|
|
440
|
-
for k in (
|
|
441
|
-
"LINKCHECKING_MIN_CACHE_DURATION",
|
|
442
|
-
"LINKCHECKING_MAX_CACHE_DURATION",
|
|
443
|
-
"LINKCHECKING_UNAVAILABLE_THRESHOLD",
|
|
444
|
-
)
|
|
445
|
-
]
|
|
446
|
-
count_availability = self.extras.get("check:count-availability", 1)
|
|
447
|
-
is_available = self.check_availability()
|
|
448
|
-
if is_available == "unknown":
|
|
449
|
-
return True
|
|
450
|
-
elif is_available or count_availability > ko_threshold:
|
|
451
|
-
delta = min(min_cache_duration * count_availability, max_cache_duration)
|
|
452
|
-
else:
|
|
453
|
-
delta = min_cache_duration
|
|
454
|
-
if self.extras.get("check:date"):
|
|
455
|
-
limit_date = datetime.utcnow() - timedelta(minutes=delta)
|
|
456
|
-
check_date = self.extras["check:date"]
|
|
457
|
-
if not isinstance(check_date, datetime):
|
|
458
|
-
try:
|
|
459
|
-
check_date = parse_dt(check_date)
|
|
460
|
-
except (ValueError, TypeError):
|
|
461
|
-
return True
|
|
462
|
-
if check_date >= limit_date:
|
|
463
|
-
return False
|
|
464
|
-
return True
|
|
465
|
-
|
|
466
436
|
@property
|
|
467
437
|
def latest(self):
|
|
468
438
|
"""
|
udata/entrypoints.py
CHANGED
|
@@ -4,7 +4,6 @@ import pkg_resources
|
|
|
4
4
|
ENTRYPOINTS = {
|
|
5
5
|
"udata.avatars": "Avatar rendering backends",
|
|
6
6
|
"udata.harvesters": "Harvest backends",
|
|
7
|
-
"udata.linkcheckers": "Link checker backends",
|
|
8
7
|
"udata.metrics": "Extra metrics",
|
|
9
8
|
"udata.models": "Models and migrations",
|
|
10
9
|
"udata.plugins": "Generic plugin",
|
udata/models/__init__.py
CHANGED
|
@@ -31,8 +31,6 @@ from udata.features.territories.models import * # noqa
|
|
|
31
31
|
# Load HarvestSource model as harvest for catalog
|
|
32
32
|
from udata.harvest.models import HarvestSource as Harvest # noqa
|
|
33
33
|
|
|
34
|
-
import udata.linkchecker.models # noqa
|
|
35
|
-
|
|
36
34
|
|
|
37
35
|
def init_app(app):
|
|
38
36
|
entrypoints.get_enabled("udata.models", app)
|
udata/mongo/extras_fields.py
CHANGED
|
@@ -11,15 +11,16 @@ ALLOWED_TYPES = (str, int, float, bool, datetime, date, list, dict)
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class ExtrasField(DictField):
|
|
14
|
-
def __init__(self, **kwargs):
|
|
14
|
+
def __init__(self, keys_types={}, **kwargs):
|
|
15
15
|
self.registered = {}
|
|
16
|
+
for key, dbtype in keys_types.items():
|
|
17
|
+
self.register(key, dbtype)
|
|
16
18
|
super(ExtrasField, self).__init__()
|
|
17
19
|
|
|
18
20
|
def register(self, key, dbtype):
|
|
19
21
|
"""Register a DB type to add constraint on a given extra key"""
|
|
20
22
|
if not issubclass(dbtype, (BaseField, EmbeddedDocument)):
|
|
21
|
-
|
|
22
|
-
raise TypeError(msg)
|
|
23
|
+
raise TypeError("ExtrasField can only register MongoEngine fields")
|
|
23
24
|
self.registered[key] = dbtype
|
|
24
25
|
|
|
25
26
|
def validate(self, values):
|
udata/settings.py
CHANGED
|
@@ -331,16 +331,6 @@ class Defaults(object):
|
|
|
331
331
|
# The order is important to compute parents/children, smaller first.
|
|
332
332
|
HANDLED_LEVELS = tuple()
|
|
333
333
|
|
|
334
|
-
LINKCHECKING_ENABLED = True
|
|
335
|
-
# Resource types ignored by linkchecker
|
|
336
|
-
LINKCHECKING_UNCHECKED_TYPES = ("api",)
|
|
337
|
-
LINKCHECKING_IGNORE_DOMAINS = []
|
|
338
|
-
LINKCHECKING_IGNORE_PATTERNS = ["format=shp"]
|
|
339
|
-
LINKCHECKING_MIN_CACHE_DURATION = 60 # in minutes
|
|
340
|
-
LINKCHECKING_MAX_CACHE_DURATION = 1080 # in minutes (1 week)
|
|
341
|
-
LINKCHECKING_UNAVAILABLE_THRESHOLD = 100
|
|
342
|
-
LINKCHECKING_DEFAULT_LINKCHECKER = "no_check"
|
|
343
|
-
|
|
344
334
|
# Ignore some endpoint from API tracking
|
|
345
335
|
# By default ignore the 3 most called APIs
|
|
346
336
|
TRACKING_BLACKLIST = [
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: udata
|
|
3
|
-
Version: 12.0.2.
|
|
3
|
+
Version: 12.0.2.dev12
|
|
4
4
|
Summary: Open data portal
|
|
5
5
|
Author-email: Opendata Team <opendatateam@data.gouv.fr>
|
|
6
6
|
Maintainer-email: Opendata Team <opendatateam@data.gouv.fr>
|
|
@@ -96,8 +96,6 @@ Requires-Dist: invoke<3.0,>=2.2.0; extra == "dev"
|
|
|
96
96
|
Requires-Dist: pip-tools<8.0,>=7.4.1; extra == "dev"
|
|
97
97
|
Requires-Dist: pre-commit<5.0,>=4.2.0; extra == "dev"
|
|
98
98
|
Requires-Dist: ruff<1.0,>=0.11.0; extra == "dev"
|
|
99
|
-
Requires-Dist: twine<7.0,>=6.1.0; extra == "dev"
|
|
100
|
-
Requires-Dist: wheel<1.0,>=0.45.1; extra == "dev"
|
|
101
99
|
Provides-Extra: test
|
|
102
100
|
Requires-Dist: faker<38.0,>=37.0.2; extra == "test"
|
|
103
101
|
Requires-Dist: feedparser<7.0,>=6.0.11; extra == "test"
|
|
@@ -2,7 +2,7 @@ udata/__init__.py,sha256=U0HEYqKCLOY43O1UCVeuAb3b3SSX1pPhsJGpHJmK67k,75
|
|
|
2
2
|
udata/api_fields.py,sha256=XI0XoM1fxO4DEzxGptOAB5SL_fJr-u58-bfQVAvzgBg,36549
|
|
3
3
|
udata/app.py,sha256=By-eZvSVSCNtWeKm_lA8TF81qaHdzScvggvbgHCDHYI,8992
|
|
4
4
|
udata/cors.py,sha256=JttAogsNVSFWEV9-1L2kdbwwsNewn3KjgBErXReNwfc,3801
|
|
5
|
-
udata/entrypoints.py,sha256=
|
|
5
|
+
udata/entrypoints.py,sha256=90n21thjq-r7EEyWf6IJZ6DS1q404OoP_VDkpRjZMvs,2611
|
|
6
6
|
udata/errors.py,sha256=E8W7b4PH7c5B85g_nsUMt8fHqMVpDFOZFkO6wMPl6bA,117
|
|
7
7
|
udata/factories.py,sha256=MoklZnU8iwNL25dm3JsoXhoQs1PQWSVYL1WvcUBtJqM,492
|
|
8
8
|
udata/i18n.py,sha256=bC9ajf66YgcYoJffvresLZLa32rb6NsY-JGMtFiVsG4,8163
|
|
@@ -10,7 +10,7 @@ udata/mail.py,sha256=Huhx_1QthJkLvuRUuP6jqb5Qq5R4iSmqeEpLVO9ZkQ4,2671
|
|
|
10
10
|
udata/rdf.py,sha256=aJKmnE1r6YyMKXLo-VRlUvOXoZSJuvNeNbMCqEl0kdY,19370
|
|
11
11
|
udata/routing.py,sha256=Hnc1ktmKVS-RUHNKw2zYTft2HJ903FhjtlcenQ9igwI,8044
|
|
12
12
|
udata/sentry.py,sha256=ekcxqUSqxfM98TtvCsPaOoX5i2l6PEcYt7kb4l3od-Q,3223
|
|
13
|
-
udata/settings.py,sha256=
|
|
13
|
+
udata/settings.py,sha256=1gDu1fnorsgzRzkMIEzFjWiVZ_7UPe1kRW-GQulb7O0,21686
|
|
14
14
|
udata/sitemap.py,sha256=oRRWoPI7ZsFFnUAOqGT1YuXFFKHBe8EcRnUCNHD7xjM,979
|
|
15
15
|
udata/tags.py,sha256=ydq4uokd6bzdeGVSpEXASVtGvDfO2LfQs9mptvvKJCM,631
|
|
16
16
|
udata/tasks.py,sha256=Sv01dhvATtq_oHOBp3J1j1VT1HQe0Pab7zxwIeIdKoo,5122
|
|
@@ -34,7 +34,7 @@ udata/auth/mails.py,sha256=ggGfgYEgNLtF-p5HocrmuQAk0b6fteWtN4UzK3ZvMlA,1759
|
|
|
34
34
|
udata/auth/password_validation.py,sha256=ODVdEsiXbtq_8ws4Yf3hs5Sq7jz-IDa1RxAm_WPIJnA,1806
|
|
35
35
|
udata/auth/proconnect.py,sha256=hsvQ71Hqy42NvwgYtcMniRXWax3Q7LX1INcmaS7gaIQ,5073
|
|
36
36
|
udata/auth/views.py,sha256=83nlLQiRlqLPILPOkKilNnrTJTs6c7XKqe-nt2I5l8s,7861
|
|
37
|
-
udata/commands/__init__.py,sha256=
|
|
37
|
+
udata/commands/__init__.py,sha256=y0ncKCjOTuxm9Tn8A0WMvub1Mai0wLQ15dq9y8GRV7c,7692
|
|
38
38
|
udata/commands/cache.py,sha256=bLdrf_fCWFYX9ULlL2ADsZRwijkI4pArsJxfx24OivM,341
|
|
39
39
|
udata/commands/db.py,sha256=OyVBcuSIqYqNywlZAi19F2yRJCAIdFKKyQ9H9alqbfI,20426
|
|
40
40
|
udata/commands/dcat.py,sha256=f6jT2AGZem-w1CaRH_ahfWB9A4oCDvjG13tPmBpeCqw,3910
|
|
@@ -92,7 +92,7 @@ udata/core/dataservices/tasks.py,sha256=fHG1r5ymfJRXJ_Lug6je3VKZoK30XKXE2rQ8x0R-
|
|
|
92
92
|
udata/core/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
93
93
|
udata/core/dataset/actions.py,sha256=mX6xox0PiMrbcAPZ3VZsI26rfM-ciYfEXxN6sqqImKA,1222
|
|
94
94
|
udata/core/dataset/activities.py,sha256=eGxMUnC47YHxTgcls6igQ3qP7cYgwFtPfj0asCylGsI,3315
|
|
95
|
-
udata/core/dataset/api.py,sha256=
|
|
95
|
+
udata/core/dataset/api.py,sha256=JBdYH2RXac1WVOF67ZzBSvImVxixMfN0rxEknnfthRU,35338
|
|
96
96
|
udata/core/dataset/api_fields.py,sha256=p7ZnmGNImZ4sgZTpoyHpI0CgOukpEIx8QdGnxlmgl2I,18032
|
|
97
97
|
udata/core/dataset/apiv2.py,sha256=1H4557ZMi6rwEyrwB1Ha20m0bf3Avhg_vDLiDQt5Fi0,21030
|
|
98
98
|
udata/core/dataset/commands.py,sha256=3mKSdJ-M7ggdG29AVn77C4ouZanbYoqkTaGQoBKOp3s,3471
|
|
@@ -103,7 +103,7 @@ udata/core/dataset/exceptions.py,sha256=uKiayLSpSzsnLvClObS6hOO0qXEqvURKN7_w8eim
|
|
|
103
103
|
udata/core/dataset/factories.py,sha256=tb18axsk8Tx5iUIqWM9IELdt-2Ryp2UN0-iY4fdea4U,9059
|
|
104
104
|
udata/core/dataset/forms.py,sha256=gGXOqy3WXFNvWMXngDq3TEqMM18-9DBpy2V7msrOsTw,7084
|
|
105
105
|
udata/core/dataset/metrics.py,sha256=s8Xs_rqRXfNWsErkiJTuRMG5o_cU5iSK8mUJFKVSc7w,1204
|
|
106
|
-
udata/core/dataset/models.py,sha256=
|
|
106
|
+
udata/core/dataset/models.py,sha256=1iXf6l0wjNv04if6gAq-NXSPLub2Mx2KOyCg2RQtids,41617
|
|
107
107
|
udata/core/dataset/permissions.py,sha256=zXQ6kU-Ni3Pl5tDtat-ZPupug9InsNeCN7xRLc2Vcrc,1097
|
|
108
108
|
udata/core/dataset/preview.py,sha256=uFEpK-p5nIAlY8hVOMhd7mtkwFt6C_PQRMNxPvAyoo4,839
|
|
109
109
|
udata/core/dataset/rdf.py,sha256=jHyHgay3g3Z04Ju5kLyMk3Vyb-kWJYtqqg8Smamv7WI,32970
|
|
@@ -356,12 +356,6 @@ udata/harvest/tests/dcat/partial-collection-1.jsonld,sha256=emPZGvpdaqgIVTgtmlWN
|
|
|
356
356
|
udata/harvest/tests/dcat/partial-collection-2.jsonld,sha256=zJ1ggcs2b4IJBDJ6zKJn8w5arjJBU_EHr6qFd8tu0I8,3691
|
|
357
357
|
udata/harvest/tests/dcat/sig.oreme.rdf,sha256=6F1P-hPyE8bZC-5uQTskTawRd19U0opIy9LCDwp0sd4,6315
|
|
358
358
|
udata/harvest/tests/dcat/udata.xml,sha256=co7tLKinEdPOwEScHUXSqSAKAFSgLElYOCI3uu50Sgo,14532
|
|
359
|
-
udata/linkchecker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
360
|
-
udata/linkchecker/backends.py,sha256=Xe_nWwjKsdMv7kgs3mbQDppVO4lqaJlboUxa_0nk1XM,1018
|
|
361
|
-
udata/linkchecker/checker.py,sha256=LB6WSMp5yvdmfdlq2ygp7uHoYEVsK0IPk6qnmhrS4GI,2903
|
|
362
|
-
udata/linkchecker/commands.py,sha256=14c78P0FTLncuJpAgPizoCY62bUIknKumev1FPZ4JQU,454
|
|
363
|
-
udata/linkchecker/models.py,sha256=V5PBSOGnCXZrZLoFQwKb91enpgVTbX4Q3_oWOPihJic,423
|
|
364
|
-
udata/linkchecker/tasks.py,sha256=jETwfn6v-mP4jh_ovz4EwlxBUcRij_wHJVK1AmC0qo4,1918
|
|
365
359
|
udata/migrations/2020-07-24-remove-s-from-scope-oauth.py,sha256=bCfBRcd4azHSV-fpZQaiHkWu7likHoWNpq9PeJvi5S0,665
|
|
366
360
|
udata/migrations/2020-08-24-add-fs-filename.py,sha256=7mz5ufFvY67NDKEmQn0HS3u1SdR9uH077ngJfd2uot4,1819
|
|
367
361
|
udata/migrations/2020-09-28-update-reuses-datasets-metrics.py,sha256=d7zizz1fwy-jCOJWmfveBpBZJSHJu46HSIBf3Xnd0eI,407
|
|
@@ -404,13 +398,13 @@ udata/migrations/2025-07-30-purge-old-harvest-dynamic-fields.py,sha256=ijeu6WvX6
|
|
|
404
398
|
udata/migrations/2025-09-04-update-legacy-frequencies.py,sha256=8YAROAHhytf6Kses_54aFscmPNes2aHRYqTwNpawdVk,1693
|
|
405
399
|
udata/migrations/2025-10-01-delete-orphaned-topic-elements.py,sha256=Mhx5ANOihZL4botxtjvfsll-xKBtkVQBkPcSq0BJ-Ec,788
|
|
406
400
|
udata/migrations/__init__.py,sha256=RBCBDaTlLjuMs_Qzwji6Z6T4r7FCGXhESKoxQbT5qAA,11221
|
|
407
|
-
udata/models/__init__.py,sha256=
|
|
401
|
+
udata/models/__init__.py,sha256=RT0WaPKm2linlYnTmD0xck7M2XuIoIJ94E0ErzNGzaw,1418
|
|
408
402
|
udata/mongo/__init__.py,sha256=y4Rv-kq3o_kcEulcNpePLzocXPBNpx3Jd82G-VZPaMc,1421
|
|
409
403
|
udata/mongo/datetime_fields.py,sha256=xACagQZu1OKPvpcznI-bMC1tJfAvo-VBUe7OOadnBdg,2089
|
|
410
404
|
udata/mongo/document.py,sha256=yJl4rzE0L69SvNbtmnmyCALTGhXwBPrj7nvM-J6sDpE,1792
|
|
411
405
|
udata/mongo/engine.py,sha256=JF9N55j7joDIn9NrItMtlIrA5CwVLhS_jlB2ptX94oA,2408
|
|
412
406
|
udata/mongo/errors.py,sha256=SpTMAc_aNIfGkqyXGCbTlIAmYxU86rGM_NtIYaB642c,472
|
|
413
|
-
udata/mongo/extras_fields.py,sha256=
|
|
407
|
+
udata/mongo/extras_fields.py,sha256=knb0fwt8eIEOA0jjeUAs9Gmn_cfUNVPuRdGEVcJzE2Y,4218
|
|
414
408
|
udata/mongo/queryset.py,sha256=fXfYkUHsCWAUoub3OR7v825USPv-PQQIHkv4U5FnjYg,3954
|
|
415
409
|
udata/mongo/slug_fields.py,sha256=tEUlwozrdQfF42KR5dxk5PUNSX7zISTIXsSgHxR4YMg,7522
|
|
416
410
|
udata/mongo/taglist_field.py,sha256=RPi8DlgMEMK1wk8hbQDLAyH2GnzZCfNpWXQsllxPB6g,1371
|
|
@@ -504,7 +498,6 @@ udata/tests/test_api_fields.py,sha256=NCUTtOMEaTM5-tK-YUxhjEud2IPIDOHR3vbZWAQdEC
|
|
|
504
498
|
udata/tests/test_cors.py,sha256=i1SQS91lm-i3YEUqoHKUFpOI7TCpDx89MzHoWd3r2uk,2932
|
|
505
499
|
udata/tests/test_dcat_commands.py,sha256=fDAnAjkja8AXw_qzaAWnVTgglkBAvK2mjPMHUCtqrrU,919
|
|
506
500
|
udata/tests/test_discussions.py,sha256=a2yBGfOSm93w8zP2s2gXy51LGniiZ0lbrej0uuXNd2E,47685
|
|
507
|
-
udata/tests/test_linkchecker.py,sha256=W8jrwKYXM8wWXZFjiaBwpWGRBhZ8bsSHGHzL9voDN7U,10218
|
|
508
501
|
udata/tests/test_mail.py,sha256=f-8meP9r1Xrz0eOTsvmdynoV9OFHLwwMr7XM5WWv_gk,4182
|
|
509
502
|
udata/tests/test_migrations.py,sha256=Iq0gt3hjK6ohliIvEpqyt0tYJz0MKIzBg_yfw7dOcHo,15698
|
|
510
503
|
udata/tests/test_model.py,sha256=mwWf8tbh4iFVzqIpTvpobjfCQOdo8EJ5x3t8mXHmd7g,21999
|
|
@@ -632,9 +625,9 @@ udata/translations/pt/LC_MESSAGES/udata.mo,sha256=nv80xZLfIfUsSOMBcr29L268FDc_Gt
|
|
|
632
625
|
udata/translations/pt/LC_MESSAGES/udata.po,sha256=bUp-7Ray8t8ALgJk3Icw1jmiGIc9_pEJQHiGw_2EU2o,50989
|
|
633
626
|
udata/translations/sr/LC_MESSAGES/udata.mo,sha256=Y_XpUxD074wXc63oJTnoVOyOQ2lmBxl-MrgluZ0Qdw4,27961
|
|
634
627
|
udata/translations/sr/LC_MESSAGES/udata.po,sha256=qh8mrz9AFuVQtXYSSP4QWsXLM_Lv3EHVifHT1NflWXY,57529
|
|
635
|
-
udata-12.0.2.
|
|
636
|
-
udata-12.0.2.
|
|
637
|
-
udata-12.0.2.
|
|
638
|
-
udata-12.0.2.
|
|
639
|
-
udata-12.0.2.
|
|
640
|
-
udata-12.0.2.
|
|
628
|
+
udata-12.0.2.dev12.dist-info/licenses/LICENSE,sha256=V8j_M8nAz8PvAOZQocyRDX7keai8UJ9skgmnwqETmdY,34520
|
|
629
|
+
udata-12.0.2.dev12.dist-info/METADATA,sha256=Q5Wk0TmHufgkuNdrCwlb4ROur_K-z2warRq0MeXgAgk,5174
|
|
630
|
+
udata-12.0.2.dev12.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
631
|
+
udata-12.0.2.dev12.dist-info/entry_points.txt,sha256=v2u12qO11i2lyLNIp136WmLJ-NHT-Kew3Duu8J-AXPM,614
|
|
632
|
+
udata-12.0.2.dev12.dist-info/top_level.txt,sha256=EF6CE6YSHd_og-8LCEA4q25ALUpWVe8D0okOLdMAE3A,6
|
|
633
|
+
udata-12.0.2.dev12.dist-info/RECORD,,
|
udata/linkchecker/__init__.py
DELETED
|
File without changes
|
udata/linkchecker/backends.py
DELETED
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
from datetime import datetime
|
|
3
|
-
|
|
4
|
-
from flask import current_app
|
|
5
|
-
|
|
6
|
-
from udata.entrypoints import get_enabled
|
|
7
|
-
|
|
8
|
-
log = logging.getLogger(__name__)
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
ENTRYPOINT = "udata.linkcheckers"
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class NoCheckLinkchecker(object):
|
|
15
|
-
"""Dummy linkchecker for resources that need no check"""
|
|
16
|
-
|
|
17
|
-
def check(self, _):
|
|
18
|
-
return {"check:status": 204, "check:available": True, "check:date": datetime.utcnow()}
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def get(name):
|
|
22
|
-
"""Get a linkchecker given its name or fallback on default"""
|
|
23
|
-
linkcheckers = get_enabled(ENTRYPOINT, current_app)
|
|
24
|
-
linkcheckers.update(no_check=NoCheckLinkchecker) # no_check always enabled
|
|
25
|
-
selected_linkchecker = linkcheckers.get(name)
|
|
26
|
-
if not selected_linkchecker:
|
|
27
|
-
default_linkchecker = current_app.config.get("LINKCHECKING_DEFAULT_LINKCHECKER")
|
|
28
|
-
selected_linkchecker = linkcheckers.get(default_linkchecker)
|
|
29
|
-
if not selected_linkchecker:
|
|
30
|
-
log.error("No linkchecker found ({} requested and no fallback)".format(name))
|
|
31
|
-
return selected_linkchecker
|
udata/linkchecker/checker.py
DELETED
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
from urllib.parse import urlparse
|
|
2
|
-
|
|
3
|
-
from flask import current_app
|
|
4
|
-
|
|
5
|
-
from .backends import NoCheckLinkchecker
|
|
6
|
-
from .backends import get as get_linkchecker
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def _get_check_keys(the_dict, resource, previous_status):
|
|
10
|
-
check_keys = {k: v for k, v in the_dict.items() if k.startswith("check:")}
|
|
11
|
-
check_keys["check:count-availability"] = _compute_count_availability(
|
|
12
|
-
resource, check_keys.get("check:available"), previous_status
|
|
13
|
-
)
|
|
14
|
-
return check_keys
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def _compute_count_availability(resource, status, previous_status):
|
|
18
|
-
"""Compute the `check:count-availability` extra value"""
|
|
19
|
-
count_availability = resource.extras.get("check:count-availability", 1)
|
|
20
|
-
return count_availability + 1 if status == previous_status else 1
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def is_ignored(resource):
|
|
24
|
-
"""Check if the resource's URL is to be ignored"""
|
|
25
|
-
ignored_domains = current_app.config["LINKCHECKING_IGNORE_DOMAINS"]
|
|
26
|
-
ignored_patterns = current_app.config["LINKCHECKING_IGNORE_PATTERNS"]
|
|
27
|
-
url = resource.url
|
|
28
|
-
if not url:
|
|
29
|
-
return True
|
|
30
|
-
parsed_url = urlparse(url)
|
|
31
|
-
ignored_domains_match = parsed_url.netloc in ignored_domains
|
|
32
|
-
ignored_patterns_match = any([p in url for p in ignored_patterns])
|
|
33
|
-
return ignored_domains_match or ignored_patterns_match
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def dummy_check_response():
|
|
37
|
-
"""Trigger a dummy check"""
|
|
38
|
-
return NoCheckLinkchecker().check(None)
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
def check_resource(resource):
|
|
42
|
-
"""
|
|
43
|
-
Check a resource availability against a linkchecker backend
|
|
44
|
-
|
|
45
|
-
The linkchecker used can be configured on a resource basis by setting
|
|
46
|
-
the `resource.extras['check:checker']` attribute with a key that points
|
|
47
|
-
to a valid `udata.linkcheckers` entrypoint. If not set, it will
|
|
48
|
-
fallback on the default linkchecker defined by the configuration variable
|
|
49
|
-
`LINKCHECKING_DEFAULT_LINKCHECKER`.
|
|
50
|
-
|
|
51
|
-
Returns
|
|
52
|
-
-------
|
|
53
|
-
dict or (dict, int)
|
|
54
|
-
Check results dict and status code (if error).
|
|
55
|
-
"""
|
|
56
|
-
linkchecker_type = resource.extras.get("check:checker")
|
|
57
|
-
LinkChecker = get_linkchecker(linkchecker_type)
|
|
58
|
-
if not LinkChecker:
|
|
59
|
-
return {"error": "No linkchecker configured."}, 503
|
|
60
|
-
if is_ignored(resource):
|
|
61
|
-
return dummy_check_response()
|
|
62
|
-
result = LinkChecker().check(resource)
|
|
63
|
-
if not result:
|
|
64
|
-
return {"error": "No response from linkchecker"}, 503
|
|
65
|
-
elif result.get("check:error"):
|
|
66
|
-
return {"error": result["check:error"]}, 500
|
|
67
|
-
elif not result.get("check:status"):
|
|
68
|
-
return {"error": "No status in response from linkchecker"}, 503
|
|
69
|
-
# store the check result in the resource's extras
|
|
70
|
-
# XXX maybe this logic should be in the `Resource` model?
|
|
71
|
-
previous_status = resource.extras.get("check:available")
|
|
72
|
-
check_keys = _get_check_keys(result, resource, previous_status)
|
|
73
|
-
resource.extras.update(check_keys)
|
|
74
|
-
resource.save(signal_kwargs={"ignores": ["post_save"]}) # Prevent signal triggering on dataset
|
|
75
|
-
return result
|
udata/linkchecker/commands.py
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
|
|
3
|
-
import click
|
|
4
|
-
|
|
5
|
-
from udata.commands import cli
|
|
6
|
-
from udata.linkchecker.tasks import check_resources
|
|
7
|
-
|
|
8
|
-
log = logging.getLogger(__name__)
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
@cli.group("linkchecker")
|
|
12
|
-
def grp():
|
|
13
|
-
"""Link checking operations"""
|
|
14
|
-
pass
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
@grp.command()
|
|
18
|
-
@click.option("-n", "--number", type=int, default=5000, help="Number of URLs to check")
|
|
19
|
-
def check(number):
|
|
20
|
-
"""Check <number> of URLs that have not been (recently) checked"""
|
|
21
|
-
check_resources(number)
|
udata/linkchecker/models.py
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
from udata.core.dataset.models import ResourceMixin
|
|
2
|
-
from udata.mongo import db
|
|
3
|
-
|
|
4
|
-
# Register harvest extras
|
|
5
|
-
ResourceMixin.extras.register("check:available", db.BooleanField)
|
|
6
|
-
ResourceMixin.extras.register("check:count-availability", db.IntField)
|
|
7
|
-
ResourceMixin.extras.register("check:status", db.IntField)
|
|
8
|
-
ResourceMixin.extras.register("check:url", db.StringField)
|
|
9
|
-
ResourceMixin.extras.register("check:date", db.DateTimeField)
|
udata/linkchecker/tasks.py
DELETED
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import uuid
|
|
3
|
-
|
|
4
|
-
from flask import current_app
|
|
5
|
-
|
|
6
|
-
from udata.models import Dataset
|
|
7
|
-
from udata.tasks import job
|
|
8
|
-
from udata.utils import get_by
|
|
9
|
-
|
|
10
|
-
from .checker import check_resource
|
|
11
|
-
|
|
12
|
-
log = logging.getLogger(__name__)
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
@job("check_resources")
|
|
16
|
-
def check_resources(self, number):
|
|
17
|
-
"""Check <number> of URLs that have not been (recently) checked"""
|
|
18
|
-
if not current_app.config.get("LINKCHECKING_ENABLED"):
|
|
19
|
-
log.error("Link checking is disabled.")
|
|
20
|
-
return
|
|
21
|
-
|
|
22
|
-
base_pipeline = [
|
|
23
|
-
{"$match": {"resources": {"$gt": []}}},
|
|
24
|
-
{"$project": {"resources._id": True, "resources.extras.check:date": True}},
|
|
25
|
-
{"$unwind": "$resources"},
|
|
26
|
-
]
|
|
27
|
-
# unchecked resources
|
|
28
|
-
pipeline = base_pipeline + [
|
|
29
|
-
{"$match": {"resources.extras.check:date": {"$eq": None}}},
|
|
30
|
-
{"$limit": number},
|
|
31
|
-
]
|
|
32
|
-
resources = list(Dataset.objects.aggregate(*pipeline))
|
|
33
|
-
# not recently checked resources
|
|
34
|
-
slots_left = number - len(resources)
|
|
35
|
-
if slots_left:
|
|
36
|
-
pipeline = base_pipeline + [
|
|
37
|
-
{"$match": {"resources.extras.check:date": {"$ne": None}}},
|
|
38
|
-
{"$sort": {"resources.extras.check:date": 1}},
|
|
39
|
-
{"$limit": slots_left},
|
|
40
|
-
]
|
|
41
|
-
resources += list(Dataset.objects.aggregate(*pipeline))
|
|
42
|
-
|
|
43
|
-
nb_resources = len(resources)
|
|
44
|
-
log.info("Checking %s resources...", nb_resources)
|
|
45
|
-
for idx, dataset_resource in enumerate(resources):
|
|
46
|
-
dataset_obj = Dataset.objects.get(id=dataset_resource["_id"])
|
|
47
|
-
resource_id = dataset_resource["resources"]["_id"]
|
|
48
|
-
rid = uuid.UUID(resource_id)
|
|
49
|
-
resource_obj = get_by(dataset_obj.resources, "id", rid)
|
|
50
|
-
log.info("Checking resource %s (%s/%s)", resource_id, idx + 1, nb_resources)
|
|
51
|
-
if resource_obj.need_check():
|
|
52
|
-
check_resource(resource_obj)
|
|
53
|
-
else:
|
|
54
|
-
log.info("--> Skipping this resource, cache is fresh enough.")
|
|
55
|
-
log.info("Done.")
|
udata/tests/test_linkchecker.py
DELETED
|
@@ -1,277 +0,0 @@
|
|
|
1
|
-
from datetime import datetime, timedelta
|
|
2
|
-
|
|
3
|
-
import mock
|
|
4
|
-
import pytest
|
|
5
|
-
|
|
6
|
-
from udata.auth import login_user
|
|
7
|
-
from udata.core.activity import init_app as init_activity
|
|
8
|
-
from udata.core.activity.models import Activity
|
|
9
|
-
from udata.core.dataset.factories import DatasetFactory, ResourceFactory
|
|
10
|
-
from udata.core.user.factories import UserFactory
|
|
11
|
-
from udata.linkchecker.checker import check_resource
|
|
12
|
-
from udata.settings import Testing
|
|
13
|
-
from udata.tests import TestCase
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class LinkcheckerTestSettings(Testing):
|
|
17
|
-
LINKCHECKING_ENABLED = True
|
|
18
|
-
LINKCHECKING_IGNORE_DOMAINS = ["example-ignore.com"]
|
|
19
|
-
LINKCHECKING_IGNORE_PATTERNS = ["format=shp"]
|
|
20
|
-
LINKCHECKING_MIN_CACHE_DURATION = 0.5
|
|
21
|
-
LINKCHECKING_UNAVAILABLE_THRESHOLD = 100
|
|
22
|
-
LINKCHECKING_MAX_CACHE_DURATION = 100
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
@pytest.fixture
|
|
26
|
-
def activity_app(app):
|
|
27
|
-
init_activity(app)
|
|
28
|
-
yield app
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def test_check_resource_creates_no_activity(activity_app, mocker):
|
|
32
|
-
resource = ResourceFactory()
|
|
33
|
-
dataset = DatasetFactory(resources=[resource])
|
|
34
|
-
user = UserFactory()
|
|
35
|
-
login_user(user)
|
|
36
|
-
check_res = {"check:status": 200, "check:available": True, "check:date": datetime.utcnow()}
|
|
37
|
-
|
|
38
|
-
class DummyLinkchecker:
|
|
39
|
-
def check(self, _):
|
|
40
|
-
return check_res
|
|
41
|
-
|
|
42
|
-
mocker.patch("udata.linkchecker.checker.get_linkchecker", return_value=DummyLinkchecker)
|
|
43
|
-
|
|
44
|
-
check_resource(resource)
|
|
45
|
-
|
|
46
|
-
activities = Activity.objects.filter(related_to=dataset)
|
|
47
|
-
assert len(activities) == 0
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
class LinkcheckerTest(TestCase):
|
|
51
|
-
settings = LinkcheckerTestSettings
|
|
52
|
-
|
|
53
|
-
def setUp(self):
|
|
54
|
-
self.resource = ResourceFactory()
|
|
55
|
-
self.dataset = DatasetFactory(resources=[self.resource])
|
|
56
|
-
|
|
57
|
-
@mock.patch("udata.linkchecker.checker.get_linkchecker")
|
|
58
|
-
def test_check_resource_no_linkchecker(self, mock_fn):
|
|
59
|
-
mock_fn.return_value = None
|
|
60
|
-
res = check_resource(self.resource)
|
|
61
|
-
self.assertEqual(res, ({"error": "No linkchecker configured."}, 503))
|
|
62
|
-
|
|
63
|
-
@mock.patch("udata.linkchecker.checker.get_linkchecker")
|
|
64
|
-
def test_check_resource_linkchecker_ok(self, mock_fn):
|
|
65
|
-
check_res = {"check:status": 200, "check:available": True, "check:date": datetime.utcnow()}
|
|
66
|
-
|
|
67
|
-
class DummyLinkchecker:
|
|
68
|
-
def check(self, _):
|
|
69
|
-
return check_res
|
|
70
|
-
|
|
71
|
-
mock_fn.return_value = DummyLinkchecker
|
|
72
|
-
|
|
73
|
-
res = check_resource(self.resource)
|
|
74
|
-
self.assertEqual(res, check_res)
|
|
75
|
-
check_res.update({"check:count-availability": 1})
|
|
76
|
-
self.assertEqual(self.resource.extras, check_res)
|
|
77
|
-
|
|
78
|
-
@mock.patch("udata.linkchecker.checker.get_linkchecker")
|
|
79
|
-
def test_check_resource_filter_result(self, mock_fn):
|
|
80
|
-
check_res = {"check:status": 200, "dummy": "dummy"}
|
|
81
|
-
|
|
82
|
-
class DummyLinkchecker:
|
|
83
|
-
def check(self, _):
|
|
84
|
-
return check_res
|
|
85
|
-
|
|
86
|
-
mock_fn.return_value = DummyLinkchecker
|
|
87
|
-
|
|
88
|
-
res = check_resource(self.resource)
|
|
89
|
-
self.assertEqual(res, check_res)
|
|
90
|
-
self.assertNotIn("dummy", self.resource.extras)
|
|
91
|
-
|
|
92
|
-
@mock.patch("udata.linkchecker.checker.get_linkchecker")
|
|
93
|
-
def test_check_resource_linkchecker_no_status(self, mock_fn):
|
|
94
|
-
class DummyLinkchecker:
|
|
95
|
-
def check(self, _):
|
|
96
|
-
return {"check:available": True}
|
|
97
|
-
|
|
98
|
-
mock_fn.return_value = DummyLinkchecker
|
|
99
|
-
res = check_resource(self.resource)
|
|
100
|
-
self.assertEqual(res, ({"error": "No status in response from linkchecker"}, 503))
|
|
101
|
-
|
|
102
|
-
@mock.patch("udata.linkchecker.checker.get_linkchecker")
|
|
103
|
-
def test_check_resource_linkchecker_check_error(self, mock_fn):
|
|
104
|
-
class DummyLinkchecker:
|
|
105
|
-
def check(self, _):
|
|
106
|
-
return {"check:error": "ERROR"}
|
|
107
|
-
|
|
108
|
-
mock_fn.return_value = DummyLinkchecker
|
|
109
|
-
res = check_resource(self.resource)
|
|
110
|
-
self.assertEqual(res, ({"error": "ERROR"}, 500))
|
|
111
|
-
|
|
112
|
-
@mock.patch("udata.linkchecker.checker.get_linkchecker")
|
|
113
|
-
def test_check_resource_linkchecker_in_resource(self, mock_fn):
|
|
114
|
-
self.resource.extras["check:checker"] = "another_linkchecker"
|
|
115
|
-
self.resource.save()
|
|
116
|
-
check_resource(self.resource)
|
|
117
|
-
args, kwargs = mock_fn.call_args
|
|
118
|
-
self.assertEqual(args, ("another_linkchecker",))
|
|
119
|
-
|
|
120
|
-
def test_check_resource_linkchecker_no_check(self):
|
|
121
|
-
self.resource.extras["check:checker"] = "no_check"
|
|
122
|
-
self.resource.save()
|
|
123
|
-
res = check_resource(self.resource)
|
|
124
|
-
self.assertEqual(res.get("check:status"), 204)
|
|
125
|
-
self.assertEqual(res.get("check:available"), True)
|
|
126
|
-
|
|
127
|
-
def test_check_resource_ignored_domain(self):
|
|
128
|
-
self.resource.extras = {}
|
|
129
|
-
self.resource.url = "http://example-ignore.com/url"
|
|
130
|
-
self.resource.save()
|
|
131
|
-
res = check_resource(self.resource)
|
|
132
|
-
self.assertEqual(res.get("check:status"), 204)
|
|
133
|
-
self.assertEqual(res.get("check:available"), True)
|
|
134
|
-
|
|
135
|
-
def test_check_resource_ignored_pattern(self):
|
|
136
|
-
self.resource.extras = {}
|
|
137
|
-
self.resource.url = "http://example.com/url?format=shp"
|
|
138
|
-
self.resource.save()
|
|
139
|
-
res = check_resource(self.resource)
|
|
140
|
-
self.assertEqual(res.get("check:status"), 204)
|
|
141
|
-
self.assertEqual(res.get("check:available"), True)
|
|
142
|
-
|
|
143
|
-
def test_is_need_check(self):
|
|
144
|
-
self.resource.extras = {
|
|
145
|
-
"check:available": True,
|
|
146
|
-
"check:date": datetime.utcnow(),
|
|
147
|
-
"check:status": 42,
|
|
148
|
-
}
|
|
149
|
-
self.assertFalse(self.resource.need_check())
|
|
150
|
-
|
|
151
|
-
def test_is_need_check_unknown_status(self):
|
|
152
|
-
self.resource.extras = {}
|
|
153
|
-
self.assertTrue(self.resource.need_check())
|
|
154
|
-
|
|
155
|
-
def test_is_need_check_cache_expired(self):
|
|
156
|
-
self.resource.extras = {
|
|
157
|
-
"check:available": True,
|
|
158
|
-
"check:date": datetime.utcnow() - timedelta(seconds=3600),
|
|
159
|
-
"check:status": 42,
|
|
160
|
-
}
|
|
161
|
-
self.assertTrue(self.resource.need_check())
|
|
162
|
-
|
|
163
|
-
def test_is_need_check_date_string(self):
|
|
164
|
-
check_date = (datetime.utcnow() - timedelta(seconds=3600)).isoformat()
|
|
165
|
-
self.resource.extras = {
|
|
166
|
-
"check:available": True,
|
|
167
|
-
"check:date": check_date,
|
|
168
|
-
"check:status": 42,
|
|
169
|
-
}
|
|
170
|
-
self.assertTrue(self.resource.need_check())
|
|
171
|
-
|
|
172
|
-
def test_is_need_check_wrong_check_date(self):
|
|
173
|
-
check_date = "123azerty"
|
|
174
|
-
self.resource.extras = {
|
|
175
|
-
"check:available": True,
|
|
176
|
-
"check:date": check_date,
|
|
177
|
-
"check:status": 42,
|
|
178
|
-
}
|
|
179
|
-
self.assertTrue(self.resource.need_check())
|
|
180
|
-
|
|
181
|
-
def test_is_need_check_wrong_check_date_int(self):
|
|
182
|
-
check_date = 42
|
|
183
|
-
self.resource.extras = {
|
|
184
|
-
"check:available": True,
|
|
185
|
-
"check:date": check_date,
|
|
186
|
-
"check:status": 42,
|
|
187
|
-
}
|
|
188
|
-
self.assertTrue(self.resource.need_check())
|
|
189
|
-
|
|
190
|
-
def test_is_need_check_count_availability(self):
|
|
191
|
-
self.resource.extras = {
|
|
192
|
-
# should need a new check after 100 * 30s = 3000s < 3600s
|
|
193
|
-
"check:count-availability": 100,
|
|
194
|
-
"check:available": True,
|
|
195
|
-
"check:date": datetime.utcnow() - timedelta(seconds=3600),
|
|
196
|
-
"check:status": 42,
|
|
197
|
-
}
|
|
198
|
-
self.assertTrue(self.resource.need_check())
|
|
199
|
-
|
|
200
|
-
def test_is_need_check_count_availability_expired(self):
|
|
201
|
-
self.resource.extras = {
|
|
202
|
-
# should need a new check after 150 * 30s = 4500s > 3600s
|
|
203
|
-
"check:count-availability": 150,
|
|
204
|
-
"check:available": True,
|
|
205
|
-
"check:date": datetime.utcnow() - timedelta(seconds=3600),
|
|
206
|
-
"check:status": 42,
|
|
207
|
-
}
|
|
208
|
-
self.assertFalse(self.resource.need_check())
|
|
209
|
-
|
|
210
|
-
def test_is_need_check_count_availability_unavailable(self):
|
|
211
|
-
self.resource.extras = {
|
|
212
|
-
# should need a new check after 30s < 3600S
|
|
213
|
-
# count-availability is below threshold
|
|
214
|
-
"check:count-availability": 95,
|
|
215
|
-
"check:available": False,
|
|
216
|
-
"check:date": datetime.utcnow() - timedelta(seconds=3600),
|
|
217
|
-
"check:status": 42,
|
|
218
|
-
}
|
|
219
|
-
self.assertTrue(self.resource.need_check())
|
|
220
|
-
|
|
221
|
-
@mock.patch("udata.linkchecker.checker.get_linkchecker")
|
|
222
|
-
def test_count_availability_increment(self, mock_fn):
|
|
223
|
-
check_res = {"check:status": 200, "check:available": True, "check:date": datetime.utcnow()}
|
|
224
|
-
|
|
225
|
-
class DummyLinkchecker:
|
|
226
|
-
def check(self, _):
|
|
227
|
-
return check_res
|
|
228
|
-
|
|
229
|
-
mock_fn.return_value = DummyLinkchecker
|
|
230
|
-
|
|
231
|
-
check_resource(self.resource)
|
|
232
|
-
self.assertEqual(self.resource.extras["check:count-availability"], 1)
|
|
233
|
-
|
|
234
|
-
check_resource(self.resource)
|
|
235
|
-
self.assertEqual(self.resource.extras["check:count-availability"], 2)
|
|
236
|
-
|
|
237
|
-
@mock.patch("udata.linkchecker.checker.get_linkchecker")
|
|
238
|
-
def test_count_availability_reset(self, mock_fn):
|
|
239
|
-
self.resource.extras = {
|
|
240
|
-
"check:status": 200,
|
|
241
|
-
"check:available": True,
|
|
242
|
-
"check:date": datetime.utcnow(),
|
|
243
|
-
"check:count-availability": 2,
|
|
244
|
-
}
|
|
245
|
-
check_res = {"check:status": 200, "check:available": False, "check:date": datetime.utcnow()}
|
|
246
|
-
|
|
247
|
-
class DummyLinkchecker:
|
|
248
|
-
def check(self, _):
|
|
249
|
-
return check_res
|
|
250
|
-
|
|
251
|
-
mock_fn.return_value = DummyLinkchecker
|
|
252
|
-
|
|
253
|
-
check_resource(self.resource)
|
|
254
|
-
self.assertEqual(self.resource.extras["check:count-availability"], 1)
|
|
255
|
-
|
|
256
|
-
def test_count_availability_threshold(self):
|
|
257
|
-
self.resource.extras = {
|
|
258
|
-
"check:status": 404,
|
|
259
|
-
"check:available": False,
|
|
260
|
-
# if it weren't above threshold, should need check (>30s)
|
|
261
|
-
# and we're still below max_cache 101 * 0.5 < 100
|
|
262
|
-
"check:date": datetime.utcnow() - timedelta(seconds=60),
|
|
263
|
-
"check:count-availability": 101,
|
|
264
|
-
}
|
|
265
|
-
self.assertFalse(self.resource.need_check())
|
|
266
|
-
|
|
267
|
-
def test_count_availability_max_cache_duration(self):
|
|
268
|
-
self.resource.extras = {
|
|
269
|
-
"check:status": 200,
|
|
270
|
-
"check:available": True,
|
|
271
|
-
# next check should be at 300 * 0.5 = 150min
|
|
272
|
-
# but we are above max cache duration 150min > 100min
|
|
273
|
-
# and 120m > 100 min so we should need a new check
|
|
274
|
-
"check:date": datetime.utcnow() - timedelta(minutes=120),
|
|
275
|
-
"check:count-availability": 300,
|
|
276
|
-
}
|
|
277
|
-
self.assertTrue(self.resource.need_check())
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|