udata 10.1.3.dev34251__py2.py3-none-any.whl → 10.1.3.dev34283__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of udata might be problematic. Click here for more details.
- udata/commands/fixtures.py +1 -1
- udata/core/dataservices/constants.py +11 -0
- udata/core/dataservices/csv.py +3 -3
- udata/core/dataservices/models.py +16 -7
- udata/core/dataservices/rdf.py +5 -3
- udata/core/dataservices/search.py +11 -2
- udata/harvest/actions.py +5 -0
- udata/harvest/backends/base.py +22 -2
- udata/harvest/models.py +19 -0
- udata/harvest/tests/test_actions.py +12 -0
- udata/harvest/tests/test_base_backend.py +74 -8
- udata/harvest/tests/test_dcat_backend.py +1 -1
- udata/migrations/2025-01-05-dataservices-fields-changes.py +136 -0
- udata/static/chunks/{10.471164b2a9fe15614797.js → 10.8ca60413647062717b1e.js} +3 -3
- udata/static/chunks/{10.471164b2a9fe15614797.js.map → 10.8ca60413647062717b1e.js.map} +1 -1
- udata/static/chunks/{11.51d706fb9521c16976bc.js → 11.b6f741fcc366abfad9c4.js} +3 -3
- udata/static/chunks/{11.51d706fb9521c16976bc.js.map → 11.b6f741fcc366abfad9c4.js.map} +1 -1
- udata/static/chunks/{13.f29411b06be1883356a3.js → 13.2d06442dd9a05d9777b5.js} +2 -2
- udata/static/chunks/{13.f29411b06be1883356a3.js.map → 13.2d06442dd9a05d9777b5.js.map} +1 -1
- udata/static/chunks/{17.3bd0340930d4a314ce9c.js → 17.e8e4caaad5cb0cc0bacc.js} +2 -2
- udata/static/chunks/{17.3bd0340930d4a314ce9c.js.map → 17.e8e4caaad5cb0cc0bacc.js.map} +1 -1
- udata/static/chunks/{19.8da42e8359d72afc2618.js → 19.f03a102365af4315f9db.js} +3 -3
- udata/static/chunks/{19.8da42e8359d72afc2618.js.map → 19.f03a102365af4315f9db.js.map} +1 -1
- udata/static/chunks/{8.54e44b102164ae5e7a67.js → 8.778091d55cd8ea39af6b.js} +2 -2
- udata/static/chunks/{8.54e44b102164ae5e7a67.js.map → 8.778091d55cd8ea39af6b.js.map} +1 -1
- udata/static/chunks/{9.07515e5187f475bce828.js → 9.033d7e190ca9e226a5d0.js} +3 -3
- udata/static/chunks/{9.07515e5187f475bce828.js.map → 9.033d7e190ca9e226a5d0.js.map} +1 -1
- udata/static/common.js +1 -1
- udata/static/common.js.map +1 -1
- udata/tests/api/test_dataservices_api.py +15 -0
- {udata-10.1.3.dev34251.dist-info → udata-10.1.3.dev34283.dist-info}/METADATA +3 -1
- {udata-10.1.3.dev34251.dist-info → udata-10.1.3.dev34283.dist-info}/RECORD +36 -34
- {udata-10.1.3.dev34251.dist-info → udata-10.1.3.dev34283.dist-info}/LICENSE +0 -0
- {udata-10.1.3.dev34251.dist-info → udata-10.1.3.dev34283.dist-info}/WHEEL +0 -0
- {udata-10.1.3.dev34251.dist-info → udata-10.1.3.dev34283.dist-info}/entry_points.txt +0 -0
- {udata-10.1.3.dev34251.dist-info → udata-10.1.3.dev34283.dist-info}/top_level.txt +0 -0
udata/commands/fixtures.py
CHANGED
|
@@ -39,7 +39,7 @@ COMMUNITY_RES_URL = "/api/1/datasets/community_resources"
|
|
|
39
39
|
DISCUSSION_URL = "/api/1/discussions"
|
|
40
40
|
|
|
41
41
|
|
|
42
|
-
DEFAULT_FIXTURE_FILE_TAG: str = "
|
|
42
|
+
DEFAULT_FIXTURE_FILE_TAG: str = "v6.0.0"
|
|
43
43
|
DEFAULT_FIXTURE_FILE: str = f"https://raw.githubusercontent.com/opendatateam/udata-fixtures/{DEFAULT_FIXTURE_FILE_TAG}/results.json" # noqa
|
|
44
44
|
|
|
45
45
|
DEFAULT_FIXTURES_RESULTS_FILENAME: str = "results.json"
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
DATASERVICE_FORMATS = ["REST", "WMS", "WSL"]
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
DATASERVICE_ACCESS_TYPE_OPEN = "open"
|
|
5
|
+
DATASERVICE_ACCESS_TYPE_OPEN_WITH_ACCOUNT = "open_with_account"
|
|
6
|
+
DATASERVICE_ACCESS_TYPE_RESTRICTED = "restricted"
|
|
7
|
+
DATASERVICE_ACCESS_TYPES = [
|
|
8
|
+
DATASERVICE_ACCESS_TYPE_OPEN,
|
|
9
|
+
DATASERVICE_ACCESS_TYPE_OPEN_WITH_ACCOUNT,
|
|
10
|
+
DATASERVICE_ACCESS_TYPE_RESTRICTED,
|
|
11
|
+
]
|
udata/core/dataservices/csv.py
CHANGED
|
@@ -13,13 +13,13 @@ class DataserviceCsvAdapter(csv.Adapter):
|
|
|
13
13
|
("url", lambda d: d.self_web_url()),
|
|
14
14
|
"description",
|
|
15
15
|
"base_api_url",
|
|
16
|
-
"
|
|
16
|
+
"machine_documentation_url",
|
|
17
|
+
"technical_documentation_url",
|
|
17
18
|
"business_documentation_url",
|
|
18
19
|
"authorization_request_url",
|
|
19
20
|
"availability",
|
|
20
21
|
"rate_limiting",
|
|
21
|
-
"
|
|
22
|
-
"has_token",
|
|
22
|
+
"access_type",
|
|
23
23
|
"license",
|
|
24
24
|
("organization", "organization.name"),
|
|
25
25
|
("organization_id", "organization.id"),
|
|
@@ -8,6 +8,7 @@ from mongoengine.signals import post_save
|
|
|
8
8
|
import udata.core.contact_point.api_fields as contact_api_fields
|
|
9
9
|
import udata.core.dataset.api_fields as datasets_api_fields
|
|
10
10
|
from udata.api_fields import field, function_field, generate_fields
|
|
11
|
+
from udata.core.dataservices.constants import DATASERVICE_ACCESS_TYPES, DATASERVICE_FORMATS
|
|
11
12
|
from udata.core.dataset.models import Dataset
|
|
12
13
|
from udata.core.metrics.models import WithMetrics
|
|
13
14
|
from udata.core.owned import Owned, OwnedQuerySet
|
|
@@ -24,8 +25,6 @@ from udata.uris import endpoint_for
|
|
|
24
25
|
# "spatial"
|
|
25
26
|
# "temporal_coverage"
|
|
26
27
|
|
|
27
|
-
DATASERVICE_FORMATS = ["REST", "WMS", "WSL"]
|
|
28
|
-
|
|
29
28
|
|
|
30
29
|
class DataserviceQuerySet(OwnedQuerySet):
|
|
31
30
|
def visible(self):
|
|
@@ -95,6 +94,7 @@ class HarvestMetadata(db.EmbeddedDocument):
|
|
|
95
94
|
)
|
|
96
95
|
last_update = field(db.DateTimeField(), description="Date of the last harvesting")
|
|
97
96
|
archived_at = field(db.DateTimeField())
|
|
97
|
+
archived_reason = field(db.StringField())
|
|
98
98
|
|
|
99
99
|
|
|
100
100
|
@generate_fields(
|
|
@@ -138,13 +138,22 @@ class Dataservice(WithMetrics, Owned, db.Document):
|
|
|
138
138
|
)
|
|
139
139
|
description = field(db.StringField(default=""), description="In markdown")
|
|
140
140
|
base_api_url = field(db.URLField(), sortable=True)
|
|
141
|
-
|
|
141
|
+
|
|
142
|
+
machine_documentation_url = field(
|
|
143
|
+
db.URLField(), description="Swagger link, OpenAPI format, WMS XML…"
|
|
144
|
+
)
|
|
145
|
+
technical_documentation_url = field(db.URLField(), description="HTML version of a Swagger…")
|
|
142
146
|
business_documentation_url = field(db.URLField())
|
|
143
|
-
|
|
144
|
-
availability = field(db.FloatField(min=0, max=100), example="99.99")
|
|
147
|
+
|
|
145
148
|
rate_limiting = field(db.StringField())
|
|
146
|
-
|
|
147
|
-
|
|
149
|
+
rate_limiting_url = field(db.URLField())
|
|
150
|
+
|
|
151
|
+
availability = field(db.FloatField(min=0, max=100), example="99.99")
|
|
152
|
+
availability_url = field(db.URLField())
|
|
153
|
+
|
|
154
|
+
access_type = field(db.StringField(choices=DATASERVICE_ACCESS_TYPES), filterable={})
|
|
155
|
+
authorization_request_url = field(db.URLField())
|
|
156
|
+
|
|
148
157
|
format = field(db.StringField(choices=DATASERVICE_FORMATS))
|
|
149
158
|
|
|
150
159
|
license = field(
|
udata/core/dataservices/rdf.py
CHANGED
|
@@ -42,7 +42,9 @@ def dataservice_from_rdf(
|
|
|
42
42
|
dataservice.description = sanitize_html(d.value(DCT.description) or d.value(DCT.abstract))
|
|
43
43
|
|
|
44
44
|
dataservice.base_api_url = url_from_rdf(d, DCAT.endpointURL)
|
|
45
|
-
|
|
45
|
+
|
|
46
|
+
# TODO detect if it's human-readable or not?
|
|
47
|
+
dataservice.machine_documentation_url = url_from_rdf(d, DCAT.endpointDescription)
|
|
46
48
|
|
|
47
49
|
roles = [ # Imbricated list of contact points for each role
|
|
48
50
|
contact_points_from_rdf(d, rdf_entity, role, dataservice)
|
|
@@ -145,8 +147,8 @@ def dataservice_to_rdf(dataservice: Dataservice, graph=None):
|
|
|
145
147
|
),
|
|
146
148
|
)
|
|
147
149
|
|
|
148
|
-
if dataservice.
|
|
149
|
-
d.set(DCAT.endpointDescription, URIRef(dataservice.
|
|
150
|
+
if dataservice.machine_documentation_url:
|
|
151
|
+
d.set(DCAT.endpointDescription, URIRef(dataservice.machine_documentation_url))
|
|
150
152
|
|
|
151
153
|
# Add DCAT-AP HVD properties if the dataservice is tagged hvd.
|
|
152
154
|
# See https://semiceu.github.io/DCAT-AP/releases/2.2.0-hvd/
|
|
@@ -5,6 +5,11 @@ from flask_restx.inputs import boolean
|
|
|
5
5
|
|
|
6
6
|
from udata.api import api
|
|
7
7
|
from udata.api.parsers import ModelApiParser
|
|
8
|
+
from udata.core.dataservices.constants import (
|
|
9
|
+
DATASERVICE_ACCESS_TYPE_OPEN,
|
|
10
|
+
DATASERVICE_ACCESS_TYPE_OPEN_WITH_ACCOUNT,
|
|
11
|
+
DATASERVICE_ACCESS_TYPE_RESTRICTED,
|
|
12
|
+
)
|
|
8
13
|
from udata.models import Dataservice, Organization, User
|
|
9
14
|
from udata.search import (
|
|
10
15
|
BoolFilter,
|
|
@@ -47,7 +52,11 @@ class DataserviceApiParser(ModelApiParser):
|
|
|
47
52
|
api.abort(400, "Organization arg must be an identifier")
|
|
48
53
|
dataservices = dataservices.filter(organization=args["organization"])
|
|
49
54
|
if "is_restricted" in args:
|
|
50
|
-
dataservices = dataservices.filter(
|
|
55
|
+
dataservices = dataservices.filter(
|
|
56
|
+
access_type__in=[DATASERVICE_ACCESS_TYPE_RESTRICTED]
|
|
57
|
+
if boolean(args["is_restricted"])
|
|
58
|
+
else [DATASERVICE_ACCESS_TYPE_OPEN, DATASERVICE_ACCESS_TYPE_OPEN_WITH_ACCOUNT]
|
|
59
|
+
)
|
|
51
60
|
return dataservices
|
|
52
61
|
|
|
53
62
|
|
|
@@ -112,6 +121,6 @@ class DataserviceSearch(ModelSearchAdapter):
|
|
|
112
121
|
"tags": dataservice.tags,
|
|
113
122
|
"extras": extras,
|
|
114
123
|
"followers": dataservice.metrics.get("followers", 0),
|
|
124
|
+
"is_restricted": dataservice.access_type == DATASERVICE_ACCESS_TYPE_RESTRICTED,
|
|
115
125
|
"views": dataservice.metrics.get("views", 0),
|
|
116
|
-
"is_restricted": dataservice.is_restricted or False,
|
|
117
126
|
}
|
udata/harvest/actions.py
CHANGED
|
@@ -7,6 +7,7 @@ from bson import ObjectId
|
|
|
7
7
|
from flask import current_app
|
|
8
8
|
|
|
9
9
|
from udata.auth import current_user
|
|
10
|
+
from udata.core.dataservices.models import Dataservice
|
|
10
11
|
from udata.core.dataset.models import HarvestDatasetMetadata
|
|
11
12
|
from udata.models import Dataset, Organization, PeriodicTask, User
|
|
12
13
|
from udata.storage.s3 import delete_file
|
|
@@ -18,6 +19,7 @@ from .models import (
|
|
|
18
19
|
VALIDATION_REFUSED,
|
|
19
20
|
HarvestJob,
|
|
20
21
|
HarvestSource,
|
|
22
|
+
archive_harvested_dataservice,
|
|
21
23
|
archive_harvested_dataset,
|
|
22
24
|
)
|
|
23
25
|
from .tasks import harvest
|
|
@@ -161,6 +163,9 @@ def purge_sources():
|
|
|
161
163
|
datasets = Dataset.objects.filter(harvest__source_id=str(source.id))
|
|
162
164
|
for dataset in datasets:
|
|
163
165
|
archive_harvested_dataset(dataset, reason="harvester-deleted", dryrun=False)
|
|
166
|
+
dataservices = Dataservice.objects.filter(harvest__source_id=str(source.id))
|
|
167
|
+
for dataservice in dataservices:
|
|
168
|
+
archive_harvested_dataservice(dataservice, reason="harvester-deleted", dryrun=False)
|
|
164
169
|
source.delete()
|
|
165
170
|
return count
|
|
166
171
|
|
udata/harvest/backends/base.py
CHANGED
|
@@ -20,6 +20,7 @@ from ..models import (
|
|
|
20
20
|
HarvestItem,
|
|
21
21
|
HarvestJob,
|
|
22
22
|
HarvestLog,
|
|
23
|
+
archive_harvested_dataservice,
|
|
23
24
|
archive_harvested_dataset,
|
|
24
25
|
)
|
|
25
26
|
from ..signals import after_harvest_job, before_harvest_job
|
|
@@ -342,6 +343,7 @@ class BaseBackend(object):
|
|
|
342
343
|
harvest.last_update = datetime.utcnow()
|
|
343
344
|
|
|
344
345
|
harvest.archived_at = None
|
|
346
|
+
harvest.archived_reason = None
|
|
345
347
|
|
|
346
348
|
return harvest
|
|
347
349
|
|
|
@@ -370,9 +372,10 @@ class BaseBackend(object):
|
|
|
370
372
|
"harvest__remote_id__nin": remote_ids,
|
|
371
373
|
"harvest__last_update__lt": limit_date,
|
|
372
374
|
}
|
|
373
|
-
|
|
375
|
+
local_datasets_not_on_remote = Dataset.objects.filter(**q)
|
|
376
|
+
local_dataservices_not_on_remote = Dataservice.objects.filter(**q)
|
|
374
377
|
|
|
375
|
-
for dataset in
|
|
378
|
+
for dataset in local_datasets_not_on_remote:
|
|
376
379
|
if not dataset.harvest.archived_at:
|
|
377
380
|
archive_harvested_dataset(dataset, reason="not-on-remote", dryrun=self.dryrun)
|
|
378
381
|
# add a HarvestItem to the job list (useful for report)
|
|
@@ -385,6 +388,23 @@ class BaseBackend(object):
|
|
|
385
388
|
|
|
386
389
|
self.save_job()
|
|
387
390
|
|
|
391
|
+
for dataservice in local_dataservices_not_on_remote:
|
|
392
|
+
if not dataservice.harvest.archived_at:
|
|
393
|
+
archive_harvested_dataservice(
|
|
394
|
+
dataservice, reason="not-on-remote", dryrun=self.dryrun
|
|
395
|
+
)
|
|
396
|
+
# add a HarvestItem to the job list (useful for report)
|
|
397
|
+
# even when archiving has already been done (useful for debug)
|
|
398
|
+
self.job.items.append(
|
|
399
|
+
HarvestItem(
|
|
400
|
+
remote_id=str(dataservice.harvest.remote_id),
|
|
401
|
+
dataservice=dataservice,
|
|
402
|
+
status="archived",
|
|
403
|
+
)
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
self.save_job()
|
|
407
|
+
|
|
388
408
|
def get_dataset(self, remote_id):
|
|
389
409
|
"""Get or create a dataset given its remote ID (and its source)
|
|
390
410
|
We first try to match `source_id` to be source domain independent
|
udata/harvest/models.py
CHANGED
|
@@ -6,6 +6,7 @@ from urllib.parse import urlparse
|
|
|
6
6
|
from werkzeug.utils import cached_property
|
|
7
7
|
|
|
8
8
|
from udata.core.dataservices.models import Dataservice
|
|
9
|
+
from udata.core.dataservices.models import HarvestMetadata as HarvestDataserviceMetadata
|
|
9
10
|
from udata.core.dataset.models import HarvestDatasetMetadata
|
|
10
11
|
from udata.core.owned import Owned, OwnedQuerySet
|
|
11
12
|
from udata.i18n import lazy_gettext as _
|
|
@@ -203,3 +204,21 @@ def archive_harvested_dataset(dataset, reason, dryrun=False):
|
|
|
203
204
|
dataset.validate()
|
|
204
205
|
else:
|
|
205
206
|
dataset.save()
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def archive_harvested_dataservice(dataservice, reason, dryrun=False):
|
|
210
|
+
"""
|
|
211
|
+
Archive an harvested dataservice, setting extras accordingly.
|
|
212
|
+
If `dryrun` is True, the dataservice is not saved but validated only.
|
|
213
|
+
"""
|
|
214
|
+
log.debug("Archiving dataservice %s", dataservice.id)
|
|
215
|
+
archival_date = datetime.utcnow()
|
|
216
|
+
dataservice.archived_at = archival_date
|
|
217
|
+
if not dataservice.harvest:
|
|
218
|
+
dataservice.harvest = HarvestDataserviceMetadata()
|
|
219
|
+
dataservice.harvest.archived_reason = reason
|
|
220
|
+
dataservice.harvest.archived_at = archival_date
|
|
221
|
+
if dryrun:
|
|
222
|
+
dataservice.validate()
|
|
223
|
+
else:
|
|
224
|
+
dataservice.save()
|
|
@@ -6,6 +6,8 @@ from tempfile import NamedTemporaryFile
|
|
|
6
6
|
import pytest
|
|
7
7
|
from mock import patch
|
|
8
8
|
|
|
9
|
+
from udata.core.dataservices.factories import DataserviceFactory
|
|
10
|
+
from udata.core.dataservices.models import HarvestMetadata as HarvestDataserviceMetadata
|
|
9
11
|
from udata.core.dataset.factories import DatasetFactory
|
|
10
12
|
from udata.core.dataset.models import HarvestDatasetMetadata
|
|
11
13
|
from udata.core.organization.factories import OrganizationFactory
|
|
@@ -396,17 +398,27 @@ class HarvestActionsTest:
|
|
|
396
398
|
dataset_to_archive = DatasetFactory(
|
|
397
399
|
harvest=HarvestDatasetMetadata(source_id=str(to_delete[0].id))
|
|
398
400
|
)
|
|
401
|
+
dataservice_to_archive = DataserviceFactory(
|
|
402
|
+
harvest=HarvestDataserviceMetadata(source_id=str(to_delete[0].id))
|
|
403
|
+
)
|
|
399
404
|
|
|
400
405
|
result = actions.purge_sources()
|
|
401
406
|
dataset_to_archive.reload()
|
|
407
|
+
dataservice_to_archive.reload()
|
|
402
408
|
|
|
403
409
|
assert result == len(to_delete)
|
|
404
410
|
assert len(HarvestSource.objects) == len(to_keep)
|
|
405
411
|
assert PeriodicTask.objects.filter(id=periodic_task.id).count() == 0
|
|
406
412
|
assert HarvestJob.objects(id=harvest_job.id).count() == 0
|
|
413
|
+
|
|
407
414
|
assert dataset_to_archive.harvest.archived == "harvester-deleted"
|
|
415
|
+
assert_equal_dates(dataset_to_archive.harvest.archived_at, now)
|
|
408
416
|
assert_equal_dates(dataset_to_archive.archived, now)
|
|
409
417
|
|
|
418
|
+
assert dataservice_to_archive.harvest.archived_reason == "harvester-deleted"
|
|
419
|
+
assert_equal_dates(dataservice_to_archive.harvest.archived_at, now)
|
|
420
|
+
assert_equal_dates(dataservice_to_archive.archived_at, now)
|
|
421
|
+
|
|
410
422
|
@pytest.mark.options(HARVEST_JOBS_RETENTION_DAYS=2)
|
|
411
423
|
def test_purge_jobs(self):
|
|
412
424
|
now = datetime.utcnow()
|
|
@@ -4,6 +4,8 @@ from urllib.parse import urlparse
|
|
|
4
4
|
import pytest
|
|
5
5
|
from voluptuous import Schema
|
|
6
6
|
|
|
7
|
+
from udata.core.dataservices.factories import DataserviceFactory
|
|
8
|
+
from udata.core.dataservices.models import Dataservice
|
|
7
9
|
from udata.core.dataset import tasks
|
|
8
10
|
from udata.core.dataset.factories import DatasetFactory
|
|
9
11
|
from udata.harvest.models import HarvestItem
|
|
@@ -20,9 +22,9 @@ class Unknown:
|
|
|
20
22
|
pass
|
|
21
23
|
|
|
22
24
|
|
|
23
|
-
def gen_remote_IDs(num: int) -> list[str]:
|
|
25
|
+
def gen_remote_IDs(num: int, prefix: str = "") -> list[str]:
|
|
24
26
|
"""Generate remote IDs."""
|
|
25
|
-
return [f"fake-{i}" for i in range(num)]
|
|
27
|
+
return [f"{prefix}fake-{i}" for i in range(num)]
|
|
26
28
|
|
|
27
29
|
|
|
28
30
|
class FakeBackend(BaseBackend):
|
|
@@ -45,6 +47,11 @@ class FakeBackend(BaseBackend):
|
|
|
45
47
|
if self.is_done():
|
|
46
48
|
return
|
|
47
49
|
|
|
50
|
+
for remote_id in self.source.config.get("dataservice_remote_ids", []):
|
|
51
|
+
self.process_dataservice(remote_id)
|
|
52
|
+
if self.is_done():
|
|
53
|
+
return
|
|
54
|
+
|
|
48
55
|
def inner_process_dataset(self, item: HarvestItem):
|
|
49
56
|
dataset = self.get_dataset(item.remote_id)
|
|
50
57
|
|
|
@@ -55,6 +62,16 @@ class FakeBackend(BaseBackend):
|
|
|
55
62
|
dataset.last_modified_internal = self.source.config["last_modified"]
|
|
56
63
|
return dataset
|
|
57
64
|
|
|
65
|
+
def inner_process_dataservice(self, item: HarvestItem):
|
|
66
|
+
dataservice = self.get_dataservice(item.remote_id)
|
|
67
|
+
|
|
68
|
+
for key, value in DataserviceFactory.as_dict().items():
|
|
69
|
+
if getattr(dataservice, key) is None:
|
|
70
|
+
setattr(dataservice, key, value)
|
|
71
|
+
if self.source.config.get("last_modified"):
|
|
72
|
+
dataservice.last_modified_internal = self.source.config["last_modified"]
|
|
73
|
+
return dataservice
|
|
74
|
+
|
|
58
75
|
|
|
59
76
|
class HarvestFilterTest:
|
|
60
77
|
@pytest.mark.parametrize("type,expected", HarvestFilter.TYPES.items())
|
|
@@ -210,7 +227,13 @@ class BaseBackendTest:
|
|
|
210
227
|
|
|
211
228
|
def test_autoarchive(self, app):
|
|
212
229
|
nb_datasets = 3
|
|
213
|
-
|
|
230
|
+
nb_dataservices = 3
|
|
231
|
+
source = HarvestSourceFactory(
|
|
232
|
+
config={
|
|
233
|
+
"dataset_remote_ids": gen_remote_IDs(nb_datasets, "dataset-"),
|
|
234
|
+
"dataservice_remote_ids": gen_remote_IDs(nb_dataservices, "dataservice-"),
|
|
235
|
+
}
|
|
236
|
+
)
|
|
214
237
|
backend = FakeBackend(source)
|
|
215
238
|
|
|
216
239
|
# create a dangling dataset to be archived
|
|
@@ -220,7 +243,15 @@ class BaseBackendTest:
|
|
|
220
243
|
harvest={
|
|
221
244
|
"domain": source.domain,
|
|
222
245
|
"source_id": str(source.id),
|
|
223
|
-
"remote_id": "not-on-remote",
|
|
246
|
+
"remote_id": "dataset-not-on-remote",
|
|
247
|
+
"last_update": last_update,
|
|
248
|
+
}
|
|
249
|
+
)
|
|
250
|
+
dataservice_arch = DataserviceFactory(
|
|
251
|
+
harvest={
|
|
252
|
+
"domain": source.domain,
|
|
253
|
+
"source_id": str(source.id),
|
|
254
|
+
"remote_id": "dataservice-not-on-remote",
|
|
224
255
|
"last_update": last_update,
|
|
225
256
|
}
|
|
226
257
|
)
|
|
@@ -232,7 +263,15 @@ class BaseBackendTest:
|
|
|
232
263
|
harvest={
|
|
233
264
|
"domain": source.domain,
|
|
234
265
|
"source_id": str(source.id),
|
|
235
|
-
"remote_id": "not-on-remote-two",
|
|
266
|
+
"remote_id": "dataset-not-on-remote-two",
|
|
267
|
+
"last_update": last_update,
|
|
268
|
+
}
|
|
269
|
+
)
|
|
270
|
+
dataservice_no_arch = DataserviceFactory(
|
|
271
|
+
harvest={
|
|
272
|
+
"domain": source.domain,
|
|
273
|
+
"source_id": str(source.id),
|
|
274
|
+
"remote_id": "dataservice-not-on-remote-two",
|
|
236
275
|
"last_update": last_update,
|
|
237
276
|
}
|
|
238
277
|
)
|
|
@@ -240,13 +279,17 @@ class BaseBackendTest:
|
|
|
240
279
|
job = backend.harvest()
|
|
241
280
|
|
|
242
281
|
# all datasets except arch : 3 mocks + 1 manual (no_arch)
|
|
243
|
-
assert len(job.items) == nb_datasets + 1
|
|
282
|
+
assert len(job.items) == (nb_datasets + 1) + (nb_dataservices + 1)
|
|
244
283
|
# all datasets : 3 mocks + 2 manuals (arch and no_arch)
|
|
245
284
|
assert Dataset.objects.count() == nb_datasets + 2
|
|
285
|
+
assert Dataservice.objects.count() == nb_dataservices + 2
|
|
246
286
|
|
|
247
287
|
archived_items = [i for i in job.items if i.status == "archived"]
|
|
248
|
-
assert len(archived_items) ==
|
|
288
|
+
assert len(archived_items) == 2
|
|
249
289
|
assert archived_items[0].dataset == dataset_arch
|
|
290
|
+
assert archived_items[0].dataservice is None
|
|
291
|
+
assert archived_items[1].dataset is None
|
|
292
|
+
assert archived_items[1].dataservice == dataservice_arch
|
|
250
293
|
|
|
251
294
|
dataset_arch.reload()
|
|
252
295
|
assert dataset_arch.archived is not None
|
|
@@ -258,18 +301,41 @@ class BaseBackendTest:
|
|
|
258
301
|
assert "archived" not in dataset_no_arch.harvest
|
|
259
302
|
assert "archived_at" not in dataset_no_arch.harvest
|
|
260
303
|
|
|
304
|
+
dataservice_arch.reload()
|
|
305
|
+
assert dataservice_arch.archived_at is not None
|
|
306
|
+
assert "archived_reason" in dataservice_arch.harvest
|
|
307
|
+
assert "archived_at" in dataservice_arch.harvest
|
|
308
|
+
|
|
309
|
+
dataservice_no_arch.reload()
|
|
310
|
+
assert dataservice_no_arch.archived_at is None
|
|
311
|
+
assert "archived_reason" not in dataservice_no_arch.harvest
|
|
312
|
+
assert "archived_at" not in dataservice_no_arch.harvest
|
|
313
|
+
|
|
261
314
|
# test unarchive: archive manually then relaunch harvest
|
|
262
|
-
dataset = Dataset.objects.get(**{"harvest__remote_id": "fake-1"})
|
|
315
|
+
dataset = Dataset.objects.get(**{"harvest__remote_id": "dataset-fake-1"})
|
|
263
316
|
dataset.archived = datetime.utcnow()
|
|
264
317
|
dataset.harvest.archived = "not-on-remote"
|
|
265
318
|
dataset.harvest.archived_at = datetime.utcnow()
|
|
266
319
|
dataset.save()
|
|
320
|
+
|
|
321
|
+
dataservice = Dataservice.objects.get(**{"harvest__remote_id": "dataservice-fake-1"})
|
|
322
|
+
dataservice.archived_at = datetime.utcnow()
|
|
323
|
+
dataservice.harvest.archived_reason = "not-on-remote"
|
|
324
|
+
dataservice.harvest.archived_at = datetime.utcnow()
|
|
325
|
+
dataservice.save()
|
|
326
|
+
|
|
267
327
|
backend.harvest()
|
|
328
|
+
|
|
268
329
|
dataset.reload()
|
|
269
330
|
assert dataset.archived is None
|
|
270
331
|
assert "archived" not in dataset.harvest
|
|
271
332
|
assert "archived_at" not in dataset.harvest
|
|
272
333
|
|
|
334
|
+
dataservice.reload()
|
|
335
|
+
assert dataservice.archived_at is None
|
|
336
|
+
assert "archived_reason" not in dataservice.harvest
|
|
337
|
+
assert "archived_at" not in dataservice.harvest
|
|
338
|
+
|
|
273
339
|
def test_harvest_datasets_get_deleted(self):
|
|
274
340
|
nb_datasets = 3
|
|
275
341
|
source = HarvestSourceFactory(config={"dataset_remote_ids": gen_remote_IDs(nb_datasets)})
|
|
@@ -179,7 +179,7 @@ class DcatBackendTest:
|
|
|
179
179
|
assert dataservices[0].title == "Explore API v2"
|
|
180
180
|
assert dataservices[0].base_api_url == "https://data.paris2024.org/api/explore/v2.1/"
|
|
181
181
|
assert (
|
|
182
|
-
dataservices[0].
|
|
182
|
+
dataservices[0].machine_documentation_url
|
|
183
183
|
== "https://data.paris2024.org/api/explore/v2.1/swagger.json"
|
|
184
184
|
)
|
|
185
185
|
assert (
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This migration keeps only the "Local authority" badge if the organization also has the "Public service" badge.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from typing import List
|
|
7
|
+
|
|
8
|
+
from mongoengine.connection import get_db
|
|
9
|
+
|
|
10
|
+
from udata.core.dataservices.constants import (
|
|
11
|
+
DATASERVICE_ACCESS_TYPE_OPEN,
|
|
12
|
+
DATASERVICE_ACCESS_TYPE_OPEN_WITH_ACCOUNT,
|
|
13
|
+
DATASERVICE_ACCESS_TYPE_RESTRICTED,
|
|
14
|
+
)
|
|
15
|
+
from udata.core.dataservices.models import Dataservice
|
|
16
|
+
|
|
17
|
+
log = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def migrate(db):
|
|
21
|
+
log.info("Preprocessing dataservices…")
|
|
22
|
+
|
|
23
|
+
count = get_db().dataservice.update_many(
|
|
24
|
+
filter={
|
|
25
|
+
"$or": [
|
|
26
|
+
{"is_restricted": None},
|
|
27
|
+
{"is_restricted": {"$exists": False}},
|
|
28
|
+
]
|
|
29
|
+
},
|
|
30
|
+
update={"$set": {"is_restricted": False}},
|
|
31
|
+
)
|
|
32
|
+
log.info(
|
|
33
|
+
f"\tConverted {count.modified_count} dataservices from `is_restricted=None` to `is_restricted=False`"
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
count = get_db().dataservice.update_many(
|
|
37
|
+
filter={
|
|
38
|
+
"$or": [
|
|
39
|
+
{"has_token": None},
|
|
40
|
+
{"has_token": {"$exists": False}},
|
|
41
|
+
]
|
|
42
|
+
},
|
|
43
|
+
update={"$set": {"has_token": False}},
|
|
44
|
+
)
|
|
45
|
+
log.info(
|
|
46
|
+
f"\tConverted {count.modified_count} dataservices from `has_token=None` to `has_token=False`"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
for dataservice in get_db().dataservice.find({"is_restricted": True, "has_token": False}):
|
|
50
|
+
log.info(
|
|
51
|
+
f"\tDataservice #{dataservice['_id']} {dataservice['title']} is restricted but without token. (will be set to access_type={DATASERVICE_ACCESS_TYPE_RESTRICTED})"
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
log.info("Processing dataservices…")
|
|
55
|
+
|
|
56
|
+
count = get_db().dataservice.update_many(
|
|
57
|
+
filter={
|
|
58
|
+
"is_restricted": True,
|
|
59
|
+
# `has_token` could be True or False, we don't care
|
|
60
|
+
},
|
|
61
|
+
update={"$set": {"access_type": DATASERVICE_ACCESS_TYPE_RESTRICTED}},
|
|
62
|
+
)
|
|
63
|
+
log.info(
|
|
64
|
+
f"\t{count.modified_count} restricted dataservices to DATASERVICE_ACCESS_TYPE_RESTRICTED"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
count = get_db().dataservice.update_many(
|
|
68
|
+
filter={
|
|
69
|
+
"is_restricted": False,
|
|
70
|
+
"has_token": True,
|
|
71
|
+
},
|
|
72
|
+
update={"$set": {"access_type": DATASERVICE_ACCESS_TYPE_OPEN_WITH_ACCOUNT}},
|
|
73
|
+
)
|
|
74
|
+
log.info(
|
|
75
|
+
f"\t{count.modified_count} dataservices not restricted but with token to DATASERVICE_ACCESS_TYPE_OPEN_WITH_ACCOUNT"
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
count = get_db().dataservice.update_many(
|
|
79
|
+
filter={
|
|
80
|
+
"is_restricted": False,
|
|
81
|
+
"has_token": False,
|
|
82
|
+
},
|
|
83
|
+
update={"$set": {"access_type": DATASERVICE_ACCESS_TYPE_OPEN}},
|
|
84
|
+
)
|
|
85
|
+
log.info(f"\t{count.modified_count} open dataservices to DATASERVICE_ACCESS_TYPE_OPEN")
|
|
86
|
+
|
|
87
|
+
dataservices: List[Dataservice] = get_db().dataservice.find()
|
|
88
|
+
for dataservice in dataservices:
|
|
89
|
+
if (
|
|
90
|
+
"endpoint_description_url" not in dataservice
|
|
91
|
+
or not dataservice["endpoint_description_url"]
|
|
92
|
+
):
|
|
93
|
+
continue
|
|
94
|
+
|
|
95
|
+
to_set = {}
|
|
96
|
+
if (
|
|
97
|
+
dataservice["endpoint_description_url"].endswith(".json")
|
|
98
|
+
or dataservice["endpoint_description_url"].endswith(".yaml")
|
|
99
|
+
or dataservice["endpoint_description_url"].endswith(".yml")
|
|
100
|
+
or dataservice["endpoint_description_url"].endswith("?format=openapi-json")
|
|
101
|
+
or "getcapabilities" in dataservice["endpoint_description_url"].lower()
|
|
102
|
+
or "getresourcedescription" in dataservice["endpoint_description_url"].lower()
|
|
103
|
+
or dataservice["endpoint_description_url"].startswith(
|
|
104
|
+
"https://api.insee.fr/catalogue/api-docs/carbon.super"
|
|
105
|
+
)
|
|
106
|
+
):
|
|
107
|
+
# log.info(f"[MACHINE] {dataservice["endpoint_description_url"]}")
|
|
108
|
+
to_set["machine_documentation_url"] = dataservice["endpoint_description_url"]
|
|
109
|
+
else:
|
|
110
|
+
# log.info(f"[ HUMAN ] {dataservice["endpoint_description_url"]}")
|
|
111
|
+
to_set["technical_documentation_url"] = dataservice["endpoint_description_url"]
|
|
112
|
+
|
|
113
|
+
result = get_db().dataservice.update_one(
|
|
114
|
+
filter={
|
|
115
|
+
"_id": dataservice["_id"],
|
|
116
|
+
},
|
|
117
|
+
update={"$set": to_set},
|
|
118
|
+
)
|
|
119
|
+
assert result.modified_count == 1
|
|
120
|
+
assert result.matched_count == 1
|
|
121
|
+
|
|
122
|
+
log.info("Postprocessing dataservices…")
|
|
123
|
+
|
|
124
|
+
count = get_db().dataservice.update_many(
|
|
125
|
+
{},
|
|
126
|
+
{
|
|
127
|
+
"$unset": {
|
|
128
|
+
"endpoint_description_url": "",
|
|
129
|
+
"is_restricted": "",
|
|
130
|
+
"has_token": "",
|
|
131
|
+
}
|
|
132
|
+
},
|
|
133
|
+
)
|
|
134
|
+
log.info(f"\tUnset legacy fields on {count.modified_count} dataservices")
|
|
135
|
+
|
|
136
|
+
log.info("Done")
|