udata 10.1.3.dev34251__py2.py3-none-any.whl → 10.1.3.dev34283__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of udata might be problematic. Click here for more details.

Files changed (36) hide show
  1. udata/commands/fixtures.py +1 -1
  2. udata/core/dataservices/constants.py +11 -0
  3. udata/core/dataservices/csv.py +3 -3
  4. udata/core/dataservices/models.py +16 -7
  5. udata/core/dataservices/rdf.py +5 -3
  6. udata/core/dataservices/search.py +11 -2
  7. udata/harvest/actions.py +5 -0
  8. udata/harvest/backends/base.py +22 -2
  9. udata/harvest/models.py +19 -0
  10. udata/harvest/tests/test_actions.py +12 -0
  11. udata/harvest/tests/test_base_backend.py +74 -8
  12. udata/harvest/tests/test_dcat_backend.py +1 -1
  13. udata/migrations/2025-01-05-dataservices-fields-changes.py +136 -0
  14. udata/static/chunks/{10.471164b2a9fe15614797.js → 10.8ca60413647062717b1e.js} +3 -3
  15. udata/static/chunks/{10.471164b2a9fe15614797.js.map → 10.8ca60413647062717b1e.js.map} +1 -1
  16. udata/static/chunks/{11.51d706fb9521c16976bc.js → 11.b6f741fcc366abfad9c4.js} +3 -3
  17. udata/static/chunks/{11.51d706fb9521c16976bc.js.map → 11.b6f741fcc366abfad9c4.js.map} +1 -1
  18. udata/static/chunks/{13.f29411b06be1883356a3.js → 13.2d06442dd9a05d9777b5.js} +2 -2
  19. udata/static/chunks/{13.f29411b06be1883356a3.js.map → 13.2d06442dd9a05d9777b5.js.map} +1 -1
  20. udata/static/chunks/{17.3bd0340930d4a314ce9c.js → 17.e8e4caaad5cb0cc0bacc.js} +2 -2
  21. udata/static/chunks/{17.3bd0340930d4a314ce9c.js.map → 17.e8e4caaad5cb0cc0bacc.js.map} +1 -1
  22. udata/static/chunks/{19.8da42e8359d72afc2618.js → 19.f03a102365af4315f9db.js} +3 -3
  23. udata/static/chunks/{19.8da42e8359d72afc2618.js.map → 19.f03a102365af4315f9db.js.map} +1 -1
  24. udata/static/chunks/{8.54e44b102164ae5e7a67.js → 8.778091d55cd8ea39af6b.js} +2 -2
  25. udata/static/chunks/{8.54e44b102164ae5e7a67.js.map → 8.778091d55cd8ea39af6b.js.map} +1 -1
  26. udata/static/chunks/{9.07515e5187f475bce828.js → 9.033d7e190ca9e226a5d0.js} +3 -3
  27. udata/static/chunks/{9.07515e5187f475bce828.js.map → 9.033d7e190ca9e226a5d0.js.map} +1 -1
  28. udata/static/common.js +1 -1
  29. udata/static/common.js.map +1 -1
  30. udata/tests/api/test_dataservices_api.py +15 -0
  31. {udata-10.1.3.dev34251.dist-info → udata-10.1.3.dev34283.dist-info}/METADATA +3 -1
  32. {udata-10.1.3.dev34251.dist-info → udata-10.1.3.dev34283.dist-info}/RECORD +36 -34
  33. {udata-10.1.3.dev34251.dist-info → udata-10.1.3.dev34283.dist-info}/LICENSE +0 -0
  34. {udata-10.1.3.dev34251.dist-info → udata-10.1.3.dev34283.dist-info}/WHEEL +0 -0
  35. {udata-10.1.3.dev34251.dist-info → udata-10.1.3.dev34283.dist-info}/entry_points.txt +0 -0
  36. {udata-10.1.3.dev34251.dist-info → udata-10.1.3.dev34283.dist-info}/top_level.txt +0 -0
@@ -39,7 +39,7 @@ COMMUNITY_RES_URL = "/api/1/datasets/community_resources"
39
39
  DISCUSSION_URL = "/api/1/discussions"
40
40
 
41
41
 
42
- DEFAULT_FIXTURE_FILE_TAG: str = "v5.0.0"
42
+ DEFAULT_FIXTURE_FILE_TAG: str = "v6.0.0"
43
43
  DEFAULT_FIXTURE_FILE: str = f"https://raw.githubusercontent.com/opendatateam/udata-fixtures/{DEFAULT_FIXTURE_FILE_TAG}/results.json" # noqa
44
44
 
45
45
  DEFAULT_FIXTURES_RESULTS_FILENAME: str = "results.json"
@@ -0,0 +1,11 @@
1
+ DATASERVICE_FORMATS = ["REST", "WMS", "WSL"]
2
+
3
+
4
+ DATASERVICE_ACCESS_TYPE_OPEN = "open"
5
+ DATASERVICE_ACCESS_TYPE_OPEN_WITH_ACCOUNT = "open_with_account"
6
+ DATASERVICE_ACCESS_TYPE_RESTRICTED = "restricted"
7
+ DATASERVICE_ACCESS_TYPES = [
8
+ DATASERVICE_ACCESS_TYPE_OPEN,
9
+ DATASERVICE_ACCESS_TYPE_OPEN_WITH_ACCOUNT,
10
+ DATASERVICE_ACCESS_TYPE_RESTRICTED,
11
+ ]
@@ -13,13 +13,13 @@ class DataserviceCsvAdapter(csv.Adapter):
13
13
  ("url", lambda d: d.self_web_url()),
14
14
  "description",
15
15
  "base_api_url",
16
- "endpoint_description_url",
16
+ "machine_documentation_url",
17
+ "technical_documentation_url",
17
18
  "business_documentation_url",
18
19
  "authorization_request_url",
19
20
  "availability",
20
21
  "rate_limiting",
21
- "is_restricted",
22
- "has_token",
22
+ "access_type",
23
23
  "license",
24
24
  ("organization", "organization.name"),
25
25
  ("organization_id", "organization.id"),
@@ -8,6 +8,7 @@ from mongoengine.signals import post_save
8
8
  import udata.core.contact_point.api_fields as contact_api_fields
9
9
  import udata.core.dataset.api_fields as datasets_api_fields
10
10
  from udata.api_fields import field, function_field, generate_fields
11
+ from udata.core.dataservices.constants import DATASERVICE_ACCESS_TYPES, DATASERVICE_FORMATS
11
12
  from udata.core.dataset.models import Dataset
12
13
  from udata.core.metrics.models import WithMetrics
13
14
  from udata.core.owned import Owned, OwnedQuerySet
@@ -24,8 +25,6 @@ from udata.uris import endpoint_for
24
25
  # "spatial"
25
26
  # "temporal_coverage"
26
27
 
27
- DATASERVICE_FORMATS = ["REST", "WMS", "WSL"]
28
-
29
28
 
30
29
  class DataserviceQuerySet(OwnedQuerySet):
31
30
  def visible(self):
@@ -95,6 +94,7 @@ class HarvestMetadata(db.EmbeddedDocument):
95
94
  )
96
95
  last_update = field(db.DateTimeField(), description="Date of the last harvesting")
97
96
  archived_at = field(db.DateTimeField())
97
+ archived_reason = field(db.StringField())
98
98
 
99
99
 
100
100
  @generate_fields(
@@ -138,13 +138,22 @@ class Dataservice(WithMetrics, Owned, db.Document):
138
138
  )
139
139
  description = field(db.StringField(default=""), description="In markdown")
140
140
  base_api_url = field(db.URLField(), sortable=True)
141
- endpoint_description_url = field(db.URLField())
141
+
142
+ machine_documentation_url = field(
143
+ db.URLField(), description="Swagger link, OpenAPI format, WMS XML…"
144
+ )
145
+ technical_documentation_url = field(db.URLField(), description="HTML version of a Swagger…")
142
146
  business_documentation_url = field(db.URLField())
143
- authorization_request_url = field(db.URLField())
144
- availability = field(db.FloatField(min=0, max=100), example="99.99")
147
+
145
148
  rate_limiting = field(db.StringField())
146
- is_restricted = field(db.BooleanField(), filterable={})
147
- has_token = field(db.BooleanField())
149
+ rate_limiting_url = field(db.URLField())
150
+
151
+ availability = field(db.FloatField(min=0, max=100), example="99.99")
152
+ availability_url = field(db.URLField())
153
+
154
+ access_type = field(db.StringField(choices=DATASERVICE_ACCESS_TYPES), filterable={})
155
+ authorization_request_url = field(db.URLField())
156
+
148
157
  format = field(db.StringField(choices=DATASERVICE_FORMATS))
149
158
 
150
159
  license = field(
@@ -42,7 +42,9 @@ def dataservice_from_rdf(
42
42
  dataservice.description = sanitize_html(d.value(DCT.description) or d.value(DCT.abstract))
43
43
 
44
44
  dataservice.base_api_url = url_from_rdf(d, DCAT.endpointURL)
45
- dataservice.endpoint_description_url = url_from_rdf(d, DCAT.endpointDescription)
45
+
46
+ # TODO detect if it's human-readable or not?
47
+ dataservice.machine_documentation_url = url_from_rdf(d, DCAT.endpointDescription)
46
48
 
47
49
  roles = [ # Imbricated list of contact points for each role
48
50
  contact_points_from_rdf(d, rdf_entity, role, dataservice)
@@ -145,8 +147,8 @@ def dataservice_to_rdf(dataservice: Dataservice, graph=None):
145
147
  ),
146
148
  )
147
149
 
148
- if dataservice.endpoint_description_url:
149
- d.set(DCAT.endpointDescription, URIRef(dataservice.endpoint_description_url))
150
+ if dataservice.machine_documentation_url:
151
+ d.set(DCAT.endpointDescription, URIRef(dataservice.machine_documentation_url))
150
152
 
151
153
  # Add DCAT-AP HVD properties if the dataservice is tagged hvd.
152
154
  # See https://semiceu.github.io/DCAT-AP/releases/2.2.0-hvd/
@@ -5,6 +5,11 @@ from flask_restx.inputs import boolean
5
5
 
6
6
  from udata.api import api
7
7
  from udata.api.parsers import ModelApiParser
8
+ from udata.core.dataservices.constants import (
9
+ DATASERVICE_ACCESS_TYPE_OPEN,
10
+ DATASERVICE_ACCESS_TYPE_OPEN_WITH_ACCOUNT,
11
+ DATASERVICE_ACCESS_TYPE_RESTRICTED,
12
+ )
8
13
  from udata.models import Dataservice, Organization, User
9
14
  from udata.search import (
10
15
  BoolFilter,
@@ -47,7 +52,11 @@ class DataserviceApiParser(ModelApiParser):
47
52
  api.abort(400, "Organization arg must be an identifier")
48
53
  dataservices = dataservices.filter(organization=args["organization"])
49
54
  if "is_restricted" in args:
50
- dataservices = dataservices.filter(is_restricted=boolean(args["is_restricted"]))
55
+ dataservices = dataservices.filter(
56
+ access_type__in=[DATASERVICE_ACCESS_TYPE_RESTRICTED]
57
+ if boolean(args["is_restricted"])
58
+ else [DATASERVICE_ACCESS_TYPE_OPEN, DATASERVICE_ACCESS_TYPE_OPEN_WITH_ACCOUNT]
59
+ )
51
60
  return dataservices
52
61
 
53
62
 
@@ -112,6 +121,6 @@ class DataserviceSearch(ModelSearchAdapter):
112
121
  "tags": dataservice.tags,
113
122
  "extras": extras,
114
123
  "followers": dataservice.metrics.get("followers", 0),
124
+ "is_restricted": dataservice.access_type == DATASERVICE_ACCESS_TYPE_RESTRICTED,
115
125
  "views": dataservice.metrics.get("views", 0),
116
- "is_restricted": dataservice.is_restricted or False,
117
126
  }
udata/harvest/actions.py CHANGED
@@ -7,6 +7,7 @@ from bson import ObjectId
7
7
  from flask import current_app
8
8
 
9
9
  from udata.auth import current_user
10
+ from udata.core.dataservices.models import Dataservice
10
11
  from udata.core.dataset.models import HarvestDatasetMetadata
11
12
  from udata.models import Dataset, Organization, PeriodicTask, User
12
13
  from udata.storage.s3 import delete_file
@@ -18,6 +19,7 @@ from .models import (
18
19
  VALIDATION_REFUSED,
19
20
  HarvestJob,
20
21
  HarvestSource,
22
+ archive_harvested_dataservice,
21
23
  archive_harvested_dataset,
22
24
  )
23
25
  from .tasks import harvest
@@ -161,6 +163,9 @@ def purge_sources():
161
163
  datasets = Dataset.objects.filter(harvest__source_id=str(source.id))
162
164
  for dataset in datasets:
163
165
  archive_harvested_dataset(dataset, reason="harvester-deleted", dryrun=False)
166
+ dataservices = Dataservice.objects.filter(harvest__source_id=str(source.id))
167
+ for dataservice in dataservices:
168
+ archive_harvested_dataservice(dataservice, reason="harvester-deleted", dryrun=False)
164
169
  source.delete()
165
170
  return count
166
171
 
@@ -20,6 +20,7 @@ from ..models import (
20
20
  HarvestItem,
21
21
  HarvestJob,
22
22
  HarvestLog,
23
+ archive_harvested_dataservice,
23
24
  archive_harvested_dataset,
24
25
  )
25
26
  from ..signals import after_harvest_job, before_harvest_job
@@ -342,6 +343,7 @@ class BaseBackend(object):
342
343
  harvest.last_update = datetime.utcnow()
343
344
 
344
345
  harvest.archived_at = None
346
+ harvest.archived_reason = None
345
347
 
346
348
  return harvest
347
349
 
@@ -370,9 +372,10 @@ class BaseBackend(object):
370
372
  "harvest__remote_id__nin": remote_ids,
371
373
  "harvest__last_update__lt": limit_date,
372
374
  }
373
- local_items_not_on_remote = Dataset.objects.filter(**q)
375
+ local_datasets_not_on_remote = Dataset.objects.filter(**q)
376
+ local_dataservices_not_on_remote = Dataservice.objects.filter(**q)
374
377
 
375
- for dataset in local_items_not_on_remote:
378
+ for dataset in local_datasets_not_on_remote:
376
379
  if not dataset.harvest.archived_at:
377
380
  archive_harvested_dataset(dataset, reason="not-on-remote", dryrun=self.dryrun)
378
381
  # add a HarvestItem to the job list (useful for report)
@@ -385,6 +388,23 @@ class BaseBackend(object):
385
388
 
386
389
  self.save_job()
387
390
 
391
+ for dataservice in local_dataservices_not_on_remote:
392
+ if not dataservice.harvest.archived_at:
393
+ archive_harvested_dataservice(
394
+ dataservice, reason="not-on-remote", dryrun=self.dryrun
395
+ )
396
+ # add a HarvestItem to the job list (useful for report)
397
+ # even when archiving has already been done (useful for debug)
398
+ self.job.items.append(
399
+ HarvestItem(
400
+ remote_id=str(dataservice.harvest.remote_id),
401
+ dataservice=dataservice,
402
+ status="archived",
403
+ )
404
+ )
405
+
406
+ self.save_job()
407
+
388
408
  def get_dataset(self, remote_id):
389
409
  """Get or create a dataset given its remote ID (and its source)
390
410
  We first try to match `source_id` to be source domain independent
udata/harvest/models.py CHANGED
@@ -6,6 +6,7 @@ from urllib.parse import urlparse
6
6
  from werkzeug.utils import cached_property
7
7
 
8
8
  from udata.core.dataservices.models import Dataservice
9
+ from udata.core.dataservices.models import HarvestMetadata as HarvestDataserviceMetadata
9
10
  from udata.core.dataset.models import HarvestDatasetMetadata
10
11
  from udata.core.owned import Owned, OwnedQuerySet
11
12
  from udata.i18n import lazy_gettext as _
@@ -203,3 +204,21 @@ def archive_harvested_dataset(dataset, reason, dryrun=False):
203
204
  dataset.validate()
204
205
  else:
205
206
  dataset.save()
207
+
208
+
209
+ def archive_harvested_dataservice(dataservice, reason, dryrun=False):
210
+ """
211
+ Archive an harvested dataservice, setting extras accordingly.
212
+ If `dryrun` is True, the dataservice is not saved but validated only.
213
+ """
214
+ log.debug("Archiving dataservice %s", dataservice.id)
215
+ archival_date = datetime.utcnow()
216
+ dataservice.archived_at = archival_date
217
+ if not dataservice.harvest:
218
+ dataservice.harvest = HarvestDataserviceMetadata()
219
+ dataservice.harvest.archived_reason = reason
220
+ dataservice.harvest.archived_at = archival_date
221
+ if dryrun:
222
+ dataservice.validate()
223
+ else:
224
+ dataservice.save()
@@ -6,6 +6,8 @@ from tempfile import NamedTemporaryFile
6
6
  import pytest
7
7
  from mock import patch
8
8
 
9
+ from udata.core.dataservices.factories import DataserviceFactory
10
+ from udata.core.dataservices.models import HarvestMetadata as HarvestDataserviceMetadata
9
11
  from udata.core.dataset.factories import DatasetFactory
10
12
  from udata.core.dataset.models import HarvestDatasetMetadata
11
13
  from udata.core.organization.factories import OrganizationFactory
@@ -396,17 +398,27 @@ class HarvestActionsTest:
396
398
  dataset_to_archive = DatasetFactory(
397
399
  harvest=HarvestDatasetMetadata(source_id=str(to_delete[0].id))
398
400
  )
401
+ dataservice_to_archive = DataserviceFactory(
402
+ harvest=HarvestDataserviceMetadata(source_id=str(to_delete[0].id))
403
+ )
399
404
 
400
405
  result = actions.purge_sources()
401
406
  dataset_to_archive.reload()
407
+ dataservice_to_archive.reload()
402
408
 
403
409
  assert result == len(to_delete)
404
410
  assert len(HarvestSource.objects) == len(to_keep)
405
411
  assert PeriodicTask.objects.filter(id=periodic_task.id).count() == 0
406
412
  assert HarvestJob.objects(id=harvest_job.id).count() == 0
413
+
407
414
  assert dataset_to_archive.harvest.archived == "harvester-deleted"
415
+ assert_equal_dates(dataset_to_archive.harvest.archived_at, now)
408
416
  assert_equal_dates(dataset_to_archive.archived, now)
409
417
 
418
+ assert dataservice_to_archive.harvest.archived_reason == "harvester-deleted"
419
+ assert_equal_dates(dataservice_to_archive.harvest.archived_at, now)
420
+ assert_equal_dates(dataservice_to_archive.archived_at, now)
421
+
410
422
  @pytest.mark.options(HARVEST_JOBS_RETENTION_DAYS=2)
411
423
  def test_purge_jobs(self):
412
424
  now = datetime.utcnow()
@@ -4,6 +4,8 @@ from urllib.parse import urlparse
4
4
  import pytest
5
5
  from voluptuous import Schema
6
6
 
7
+ from udata.core.dataservices.factories import DataserviceFactory
8
+ from udata.core.dataservices.models import Dataservice
7
9
  from udata.core.dataset import tasks
8
10
  from udata.core.dataset.factories import DatasetFactory
9
11
  from udata.harvest.models import HarvestItem
@@ -20,9 +22,9 @@ class Unknown:
20
22
  pass
21
23
 
22
24
 
23
- def gen_remote_IDs(num: int) -> list[str]:
25
+ def gen_remote_IDs(num: int, prefix: str = "") -> list[str]:
24
26
  """Generate remote IDs."""
25
- return [f"fake-{i}" for i in range(num)]
27
+ return [f"{prefix}fake-{i}" for i in range(num)]
26
28
 
27
29
 
28
30
  class FakeBackend(BaseBackend):
@@ -45,6 +47,11 @@ class FakeBackend(BaseBackend):
45
47
  if self.is_done():
46
48
  return
47
49
 
50
+ for remote_id in self.source.config.get("dataservice_remote_ids", []):
51
+ self.process_dataservice(remote_id)
52
+ if self.is_done():
53
+ return
54
+
48
55
  def inner_process_dataset(self, item: HarvestItem):
49
56
  dataset = self.get_dataset(item.remote_id)
50
57
 
@@ -55,6 +62,16 @@ class FakeBackend(BaseBackend):
55
62
  dataset.last_modified_internal = self.source.config["last_modified"]
56
63
  return dataset
57
64
 
65
+ def inner_process_dataservice(self, item: HarvestItem):
66
+ dataservice = self.get_dataservice(item.remote_id)
67
+
68
+ for key, value in DataserviceFactory.as_dict().items():
69
+ if getattr(dataservice, key) is None:
70
+ setattr(dataservice, key, value)
71
+ if self.source.config.get("last_modified"):
72
+ dataservice.last_modified_internal = self.source.config["last_modified"]
73
+ return dataservice
74
+
58
75
 
59
76
  class HarvestFilterTest:
60
77
  @pytest.mark.parametrize("type,expected", HarvestFilter.TYPES.items())
@@ -210,7 +227,13 @@ class BaseBackendTest:
210
227
 
211
228
  def test_autoarchive(self, app):
212
229
  nb_datasets = 3
213
- source = HarvestSourceFactory(config={"dataset_remote_ids": gen_remote_IDs(nb_datasets)})
230
+ nb_dataservices = 3
231
+ source = HarvestSourceFactory(
232
+ config={
233
+ "dataset_remote_ids": gen_remote_IDs(nb_datasets, "dataset-"),
234
+ "dataservice_remote_ids": gen_remote_IDs(nb_dataservices, "dataservice-"),
235
+ }
236
+ )
214
237
  backend = FakeBackend(source)
215
238
 
216
239
  # create a dangling dataset to be archived
@@ -220,7 +243,15 @@ class BaseBackendTest:
220
243
  harvest={
221
244
  "domain": source.domain,
222
245
  "source_id": str(source.id),
223
- "remote_id": "not-on-remote",
246
+ "remote_id": "dataset-not-on-remote",
247
+ "last_update": last_update,
248
+ }
249
+ )
250
+ dataservice_arch = DataserviceFactory(
251
+ harvest={
252
+ "domain": source.domain,
253
+ "source_id": str(source.id),
254
+ "remote_id": "dataservice-not-on-remote",
224
255
  "last_update": last_update,
225
256
  }
226
257
  )
@@ -232,7 +263,15 @@ class BaseBackendTest:
232
263
  harvest={
233
264
  "domain": source.domain,
234
265
  "source_id": str(source.id),
235
- "remote_id": "not-on-remote-two",
266
+ "remote_id": "dataset-not-on-remote-two",
267
+ "last_update": last_update,
268
+ }
269
+ )
270
+ dataservice_no_arch = DataserviceFactory(
271
+ harvest={
272
+ "domain": source.domain,
273
+ "source_id": str(source.id),
274
+ "remote_id": "dataservice-not-on-remote-two",
236
275
  "last_update": last_update,
237
276
  }
238
277
  )
@@ -240,13 +279,17 @@ class BaseBackendTest:
240
279
  job = backend.harvest()
241
280
 
242
281
  # all datasets except arch : 3 mocks + 1 manual (no_arch)
243
- assert len(job.items) == nb_datasets + 1
282
+ assert len(job.items) == (nb_datasets + 1) + (nb_dataservices + 1)
244
283
  # all datasets : 3 mocks + 2 manuals (arch and no_arch)
245
284
  assert Dataset.objects.count() == nb_datasets + 2
285
+ assert Dataservice.objects.count() == nb_dataservices + 2
246
286
 
247
287
  archived_items = [i for i in job.items if i.status == "archived"]
248
- assert len(archived_items) == 1
288
+ assert len(archived_items) == 2
249
289
  assert archived_items[0].dataset == dataset_arch
290
+ assert archived_items[0].dataservice is None
291
+ assert archived_items[1].dataset is None
292
+ assert archived_items[1].dataservice == dataservice_arch
250
293
 
251
294
  dataset_arch.reload()
252
295
  assert dataset_arch.archived is not None
@@ -258,18 +301,41 @@ class BaseBackendTest:
258
301
  assert "archived" not in dataset_no_arch.harvest
259
302
  assert "archived_at" not in dataset_no_arch.harvest
260
303
 
304
+ dataservice_arch.reload()
305
+ assert dataservice_arch.archived_at is not None
306
+ assert "archived_reason" in dataservice_arch.harvest
307
+ assert "archived_at" in dataservice_arch.harvest
308
+
309
+ dataservice_no_arch.reload()
310
+ assert dataservice_no_arch.archived_at is None
311
+ assert "archived_reason" not in dataservice_no_arch.harvest
312
+ assert "archived_at" not in dataservice_no_arch.harvest
313
+
261
314
  # test unarchive: archive manually then relaunch harvest
262
- dataset = Dataset.objects.get(**{"harvest__remote_id": "fake-1"})
315
+ dataset = Dataset.objects.get(**{"harvest__remote_id": "dataset-fake-1"})
263
316
  dataset.archived = datetime.utcnow()
264
317
  dataset.harvest.archived = "not-on-remote"
265
318
  dataset.harvest.archived_at = datetime.utcnow()
266
319
  dataset.save()
320
+
321
+ dataservice = Dataservice.objects.get(**{"harvest__remote_id": "dataservice-fake-1"})
322
+ dataservice.archived_at = datetime.utcnow()
323
+ dataservice.harvest.archived_reason = "not-on-remote"
324
+ dataservice.harvest.archived_at = datetime.utcnow()
325
+ dataservice.save()
326
+
267
327
  backend.harvest()
328
+
268
329
  dataset.reload()
269
330
  assert dataset.archived is None
270
331
  assert "archived" not in dataset.harvest
271
332
  assert "archived_at" not in dataset.harvest
272
333
 
334
+ dataservice.reload()
335
+ assert dataservice.archived_at is None
336
+ assert "archived_reason" not in dataservice.harvest
337
+ assert "archived_at" not in dataservice.harvest
338
+
273
339
  def test_harvest_datasets_get_deleted(self):
274
340
  nb_datasets = 3
275
341
  source = HarvestSourceFactory(config={"dataset_remote_ids": gen_remote_IDs(nb_datasets)})
@@ -179,7 +179,7 @@ class DcatBackendTest:
179
179
  assert dataservices[0].title == "Explore API v2"
180
180
  assert dataservices[0].base_api_url == "https://data.paris2024.org/api/explore/v2.1/"
181
181
  assert (
182
- dataservices[0].endpoint_description_url
182
+ dataservices[0].machine_documentation_url
183
183
  == "https://data.paris2024.org/api/explore/v2.1/swagger.json"
184
184
  )
185
185
  assert (
@@ -0,0 +1,136 @@
1
+ """
2
+ This migration keeps only the "Local authority" badge if the organization also has the "Public service" badge.
3
+ """
4
+
5
+ import logging
6
+ from typing import List
7
+
8
+ from mongoengine.connection import get_db
9
+
10
+ from udata.core.dataservices.constants import (
11
+ DATASERVICE_ACCESS_TYPE_OPEN,
12
+ DATASERVICE_ACCESS_TYPE_OPEN_WITH_ACCOUNT,
13
+ DATASERVICE_ACCESS_TYPE_RESTRICTED,
14
+ )
15
+ from udata.core.dataservices.models import Dataservice
16
+
17
+ log = logging.getLogger(__name__)
18
+
19
+
20
+ def migrate(db):
21
+ log.info("Preprocessing dataservices…")
22
+
23
+ count = get_db().dataservice.update_many(
24
+ filter={
25
+ "$or": [
26
+ {"is_restricted": None},
27
+ {"is_restricted": {"$exists": False}},
28
+ ]
29
+ },
30
+ update={"$set": {"is_restricted": False}},
31
+ )
32
+ log.info(
33
+ f"\tConverted {count.modified_count} dataservices from `is_restricted=None` to `is_restricted=False`"
34
+ )
35
+
36
+ count = get_db().dataservice.update_many(
37
+ filter={
38
+ "$or": [
39
+ {"has_token": None},
40
+ {"has_token": {"$exists": False}},
41
+ ]
42
+ },
43
+ update={"$set": {"has_token": False}},
44
+ )
45
+ log.info(
46
+ f"\tConverted {count.modified_count} dataservices from `has_token=None` to `has_token=False`"
47
+ )
48
+
49
+ for dataservice in get_db().dataservice.find({"is_restricted": True, "has_token": False}):
50
+ log.info(
51
+ f"\tDataservice #{dataservice['_id']} {dataservice['title']} is restricted but without token. (will be set to access_type={DATASERVICE_ACCESS_TYPE_RESTRICTED})"
52
+ )
53
+
54
+ log.info("Processing dataservices…")
55
+
56
+ count = get_db().dataservice.update_many(
57
+ filter={
58
+ "is_restricted": True,
59
+ # `has_token` could be True or False, we don't care
60
+ },
61
+ update={"$set": {"access_type": DATASERVICE_ACCESS_TYPE_RESTRICTED}},
62
+ )
63
+ log.info(
64
+ f"\t{count.modified_count} restricted dataservices to DATASERVICE_ACCESS_TYPE_RESTRICTED"
65
+ )
66
+
67
+ count = get_db().dataservice.update_many(
68
+ filter={
69
+ "is_restricted": False,
70
+ "has_token": True,
71
+ },
72
+ update={"$set": {"access_type": DATASERVICE_ACCESS_TYPE_OPEN_WITH_ACCOUNT}},
73
+ )
74
+ log.info(
75
+ f"\t{count.modified_count} dataservices not restricted but with token to DATASERVICE_ACCESS_TYPE_OPEN_WITH_ACCOUNT"
76
+ )
77
+
78
+ count = get_db().dataservice.update_many(
79
+ filter={
80
+ "is_restricted": False,
81
+ "has_token": False,
82
+ },
83
+ update={"$set": {"access_type": DATASERVICE_ACCESS_TYPE_OPEN}},
84
+ )
85
+ log.info(f"\t{count.modified_count} open dataservices to DATASERVICE_ACCESS_TYPE_OPEN")
86
+
87
+ dataservices: List[Dataservice] = get_db().dataservice.find()
88
+ for dataservice in dataservices:
89
+ if (
90
+ "endpoint_description_url" not in dataservice
91
+ or not dataservice["endpoint_description_url"]
92
+ ):
93
+ continue
94
+
95
+ to_set = {}
96
+ if (
97
+ dataservice["endpoint_description_url"].endswith(".json")
98
+ or dataservice["endpoint_description_url"].endswith(".yaml")
99
+ or dataservice["endpoint_description_url"].endswith(".yml")
100
+ or dataservice["endpoint_description_url"].endswith("?format=openapi-json")
101
+ or "getcapabilities" in dataservice["endpoint_description_url"].lower()
102
+ or "getresourcedescription" in dataservice["endpoint_description_url"].lower()
103
+ or dataservice["endpoint_description_url"].startswith(
104
+ "https://api.insee.fr/catalogue/api-docs/carbon.super"
105
+ )
106
+ ):
107
+ # log.info(f"[MACHINE] {dataservice["endpoint_description_url"]}")
108
+ to_set["machine_documentation_url"] = dataservice["endpoint_description_url"]
109
+ else:
110
+ # log.info(f"[ HUMAN ] {dataservice["endpoint_description_url"]}")
111
+ to_set["technical_documentation_url"] = dataservice["endpoint_description_url"]
112
+
113
+ result = get_db().dataservice.update_one(
114
+ filter={
115
+ "_id": dataservice["_id"],
116
+ },
117
+ update={"$set": to_set},
118
+ )
119
+ assert result.modified_count == 1
120
+ assert result.matched_count == 1
121
+
122
+ log.info("Postprocessing dataservices…")
123
+
124
+ count = get_db().dataservice.update_many(
125
+ {},
126
+ {
127
+ "$unset": {
128
+ "endpoint_description_url": "",
129
+ "is_restricted": "",
130
+ "has_token": "",
131
+ }
132
+ },
133
+ )
134
+ log.info(f"\tUnset legacy fields on {count.modified_count} dataservices")
135
+
136
+ log.info("Done")