udata 10.2.1.dev34693__py2.py3-none-any.whl → 10.2.1.dev34761__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of udata might be problematic. Click here for more details.
- udata/core/dataset/models.py +49 -23
- udata/core/dataset/rdf.py +19 -4
- udata/harvest/backends/base.py +1 -1
- udata/harvest/backends/dcat.py +48 -6
- udata/harvest/tests/factories.py +1 -1
- udata/harvest/tests/test_base_backend.py +2 -2
- udata/migrations/2025-03-20-save-quality-for-datasets.py +25 -0
- udata/static/chunks/{10.471164b2a9fe15614797.js → 10.8ca60413647062717b1e.js} +3 -3
- udata/static/chunks/{10.471164b2a9fe15614797.js.map → 10.8ca60413647062717b1e.js.map} +1 -1
- udata/static/chunks/{11.51d706fb9521c16976bc.js → 11.b6f741fcc366abfad9c4.js} +3 -3
- udata/static/chunks/{11.51d706fb9521c16976bc.js.map → 11.b6f741fcc366abfad9c4.js.map} +1 -1
- udata/static/chunks/{13.f29411b06be1883356a3.js → 13.2d06442dd9a05d9777b5.js} +2 -2
- udata/static/chunks/{13.f29411b06be1883356a3.js.map → 13.2d06442dd9a05d9777b5.js.map} +1 -1
- udata/static/chunks/{17.3bd0340930d4a314ce9c.js → 17.e8e4caaad5cb0cc0bacc.js} +2 -2
- udata/static/chunks/{17.3bd0340930d4a314ce9c.js.map → 17.e8e4caaad5cb0cc0bacc.js.map} +1 -1
- udata/static/chunks/{19.8da42e8359d72afc2618.js → 19.f03a102365af4315f9db.js} +3 -3
- udata/static/chunks/{19.8da42e8359d72afc2618.js.map → 19.f03a102365af4315f9db.js.map} +1 -1
- udata/static/chunks/{8.54e44b102164ae5e7a67.js → 8.778091d55cd8ea39af6b.js} +2 -2
- udata/static/chunks/{8.54e44b102164ae5e7a67.js.map → 8.778091d55cd8ea39af6b.js.map} +1 -1
- udata/static/chunks/{9.07515e5187f475bce828.js → 9.033d7e190ca9e226a5d0.js} +3 -3
- udata/static/chunks/{9.07515e5187f475bce828.js.map → 9.033d7e190ca9e226a5d0.js.map} +1 -1
- udata/static/common.js +1 -1
- udata/static/common.js.map +1 -1
- udata/tests/apiv2/test_datasets.py +7 -1
- udata/tests/dataset/test_dataset_model.py +0 -10
- udata/tests/dataset/test_dataset_rdf.py +18 -0
- {udata-10.2.1.dev34693.dist-info → udata-10.2.1.dev34761.dist-info}/METADATA +3 -1
- {udata-10.2.1.dev34693.dist-info → udata-10.2.1.dev34761.dist-info}/RECORD +32 -31
- {udata-10.2.1.dev34693.dist-info → udata-10.2.1.dev34761.dist-info}/LICENSE +0 -0
- {udata-10.2.1.dev34693.dist-info → udata-10.2.1.dev34761.dist-info}/WHEEL +0 -0
- {udata-10.2.1.dev34693.dist-info → udata-10.2.1.dev34761.dist-info}/entry_points.txt +0 -0
- {udata-10.2.1.dev34693.dist-info → udata-10.2.1.dev34761.dist-info}/top_level.txt +0 -0
udata/core/dataset/models.py
CHANGED
|
@@ -384,7 +384,7 @@ class ResourceMixin(object):
|
|
|
384
384
|
return to_naive_datetime(self.harvest.modified_at)
|
|
385
385
|
if self.filetype == "remote" and self.extras.get("analysis:last-modified-at"):
|
|
386
386
|
return to_naive_datetime(self.extras.get("analysis:last-modified-at"))
|
|
387
|
-
return self.last_modified_internal
|
|
387
|
+
return to_naive_datetime(self.last_modified_internal)
|
|
388
388
|
|
|
389
389
|
def clean(self):
|
|
390
390
|
super(ResourceMixin, self).clean()
|
|
@@ -565,6 +565,8 @@ class Dataset(WithMetrics, DatasetBadgeMixin, Owned, db.Document):
|
|
|
565
565
|
extras = db.ExtrasField()
|
|
566
566
|
harvest = db.EmbeddedDocumentField(HarvestDatasetMetadata)
|
|
567
567
|
|
|
568
|
+
quality_cached = db.DictField()
|
|
569
|
+
|
|
568
570
|
featured = db.BooleanField(required=True, default=False)
|
|
569
571
|
|
|
570
572
|
contact_points = db.ListField(db.ReferenceField("ContactPoint", reverse_delete_rule=db.PULL))
|
|
@@ -672,6 +674,8 @@ class Dataset(WithMetrics, DatasetBadgeMixin, Owned, db.Document):
|
|
|
672
674
|
if len(set(res.id for res in self.resources)) != len(self.resources):
|
|
673
675
|
raise MongoEngineValidationError(f"Duplicate resource ID in dataset #{self.id}.")
|
|
674
676
|
|
|
677
|
+
self.quality_cached = self.compute_quality()
|
|
678
|
+
|
|
675
679
|
for key, value in self.extras.items():
|
|
676
680
|
if not key.startswith("custom:"):
|
|
677
681
|
continue
|
|
@@ -763,13 +767,9 @@ class Dataset(WithMetrics, DatasetBadgeMixin, Owned, db.Document):
|
|
|
763
767
|
|
|
764
768
|
@property
|
|
765
769
|
def last_modified(self):
|
|
766
|
-
if
|
|
767
|
-
self.harvest
|
|
768
|
-
and self.harvest.modified_at
|
|
769
|
-
and to_naive_datetime(self.harvest.modified_at) < datetime.utcnow()
|
|
770
|
-
):
|
|
770
|
+
if self.harvest and self.harvest.modified_at:
|
|
771
771
|
return to_naive_datetime(self.harvest.modified_at)
|
|
772
|
-
return self.last_modified_internal
|
|
772
|
+
return to_naive_datetime(self.last_modified_internal)
|
|
773
773
|
|
|
774
774
|
@property
|
|
775
775
|
def last_update(self):
|
|
@@ -824,8 +824,34 @@ class Dataset(WithMetrics, DatasetBadgeMixin, Owned, db.Document):
|
|
|
824
824
|
else:
|
|
825
825
|
return self.last_update + delta
|
|
826
826
|
|
|
827
|
-
@
|
|
827
|
+
@property
|
|
828
828
|
def quality(self):
|
|
829
|
+
# `quality_cached` should always be set, except during the migration
|
|
830
|
+
# creating this property. We could remove `or self.compute_quality()`
|
|
831
|
+
# after the migration but since we need to keep the computed property for
|
|
832
|
+
# `update_fulfilled_in_time`, maybe we leave it here? Just in case?
|
|
833
|
+
quality = self.quality_cached or self.compute_quality()
|
|
834
|
+
|
|
835
|
+
# :UpdateFulfilledInTime
|
|
836
|
+
# `next_update_for_update_fulfilled_in_time` is only useful to compute the
|
|
837
|
+
# real `update_fulfilled_in_time` check, so we pop it to not polute the `quality`
|
|
838
|
+
# object for users.
|
|
839
|
+
next_update = quality.pop("next_update_for_update_fulfilled_in_time", None)
|
|
840
|
+
if next_update:
|
|
841
|
+
# Allow for being one day late on update.
|
|
842
|
+
# We may have up to one day delay due to harvesting for example
|
|
843
|
+
quality["update_fulfilled_in_time"] = (next_update - datetime.utcnow()).days >= -1
|
|
844
|
+
elif self.frequency in ["continuous", "irregular", "punctual"]:
|
|
845
|
+
# For these frequencies, we don't expect regular updates or can't quantify them.
|
|
846
|
+
# Thus we consider the update_fulfilled_in_time quality criterion to be true.
|
|
847
|
+
quality["update_fulfilled_in_time"] = True
|
|
848
|
+
|
|
849
|
+
# Since `update_fulfilled_in_time` cannot be precomputed, `score` cannot either.
|
|
850
|
+
quality["score"] = self.compute_quality_score(quality)
|
|
851
|
+
|
|
852
|
+
return quality
|
|
853
|
+
|
|
854
|
+
def compute_quality(self):
|
|
829
855
|
"""Return a dict filled with metrics related to the inner
|
|
830
856
|
|
|
831
857
|
quality of the dataset:
|
|
@@ -835,25 +861,18 @@ class Dataset(WithMetrics, DatasetBadgeMixin, Owned, db.Document):
|
|
|
835
861
|
* and so on
|
|
836
862
|
"""
|
|
837
863
|
result = {}
|
|
838
|
-
if not self.id:
|
|
839
|
-
# Quality is only relevant on saved Datasets
|
|
840
|
-
return result
|
|
841
864
|
|
|
842
865
|
result["license"] = True if self.license else False
|
|
843
866
|
result["temporal_coverage"] = True if self.temporal_coverage else False
|
|
844
867
|
result["spatial"] = True if self.spatial else False
|
|
845
868
|
|
|
846
869
|
result["update_frequency"] = self.frequency and self.frequency != "unknown"
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
elif self.frequency in ["continuous", "irregular", "punctual"]:
|
|
854
|
-
# For these frequencies, we don't expect regular updates or can't quantify them.
|
|
855
|
-
# Thus we consider the update_fulfilled_in_time quality criterion to be true.
|
|
856
|
-
result["update_fulfilled_in_time"] = True
|
|
870
|
+
|
|
871
|
+
# We only save the next_update here because it is based on resources
|
|
872
|
+
# We cannot save the `update_fulfilled_in_time` because it is time
|
|
873
|
+
# sensitive (so setting it on save is not really useful…)
|
|
874
|
+
# See :UpdateFulfilledInTime
|
|
875
|
+
result["next_update_for_update_fulfilled_in_time"] = self.next_update
|
|
857
876
|
|
|
858
877
|
result["dataset_description_quality"] = (
|
|
859
878
|
True
|
|
@@ -876,7 +895,6 @@ class Dataset(WithMetrics, DatasetBadgeMixin, Owned, db.Document):
|
|
|
876
895
|
resource_desc = True
|
|
877
896
|
result["resources_documentation"] = resource_doc or resource_desc
|
|
878
897
|
|
|
879
|
-
result["score"] = self.compute_quality_score(result)
|
|
880
898
|
return result
|
|
881
899
|
|
|
882
900
|
@property
|
|
@@ -934,8 +952,16 @@ class Dataset(WithMetrics, DatasetBadgeMixin, Owned, db.Document):
|
|
|
934
952
|
if resource.id in [r.id for r in self.resources]:
|
|
935
953
|
raise MongoEngineValidationError("Cannot add resource with already existing ID")
|
|
936
954
|
|
|
955
|
+
self.resources.insert(0, resource)
|
|
937
956
|
self.update(
|
|
938
|
-
__raw__={
|
|
957
|
+
__raw__={
|
|
958
|
+
"$set": {
|
|
959
|
+
"quality_cached": self.compute_quality(),
|
|
960
|
+
},
|
|
961
|
+
"$push": {
|
|
962
|
+
"resources": {"$each": [resource.to_mongo()], "$position": 0},
|
|
963
|
+
},
|
|
964
|
+
}
|
|
939
965
|
)
|
|
940
966
|
self.reload()
|
|
941
967
|
self.on_resource_added.send(self.__class__, document=self, resource_id=resource.id)
|
udata/core/dataset/rdf.py
CHANGED
|
@@ -5,7 +5,7 @@ This module centralize dataset helpers for RDF/DCAT serialization and parsing
|
|
|
5
5
|
import calendar
|
|
6
6
|
import json
|
|
7
7
|
import logging
|
|
8
|
-
from datetime import date
|
|
8
|
+
from datetime import date, datetime
|
|
9
9
|
from typing import Optional
|
|
10
10
|
|
|
11
11
|
from dateutil.parser import parse as parse_dt
|
|
@@ -50,7 +50,7 @@ from udata.rdf import (
|
|
|
50
50
|
url_from_rdf,
|
|
51
51
|
)
|
|
52
52
|
from udata.uris import endpoint_for
|
|
53
|
-
from udata.utils import get_by, safe_unicode
|
|
53
|
+
from udata.utils import get_by, safe_unicode, to_naive_datetime
|
|
54
54
|
|
|
55
55
|
from .constants import OGC_SERVICE_FORMATS, UPDATE_FREQUENCIES
|
|
56
56
|
from .models import Checksum, Dataset, License, Resource
|
|
@@ -735,7 +735,14 @@ def resource_from_rdf(graph_or_distrib, dataset=None, is_additionnal=False):
|
|
|
735
735
|
if not resource.harvest:
|
|
736
736
|
resource.harvest = HarvestResourceMetadata()
|
|
737
737
|
resource.harvest.created_at = created_at
|
|
738
|
-
|
|
738
|
+
|
|
739
|
+
# In the past, we've encountered future `modified_at` during harvesting
|
|
740
|
+
# do not save it. :FutureHarvestModifiedAt
|
|
741
|
+
if modified_at and to_naive_datetime(modified_at) > datetime.utcnow():
|
|
742
|
+
log.warning(f"Future `DCT.modified` date '{modified_at}' in resource")
|
|
743
|
+
else:
|
|
744
|
+
resource.harvest.modified_at = modified_at
|
|
745
|
+
|
|
739
746
|
resource.harvest.dct_identifier = identifier
|
|
740
747
|
resource.harvest.uri = uri
|
|
741
748
|
|
|
@@ -755,6 +762,8 @@ def dataset_from_rdf(graph: Graph, dataset=None, node=None, remote_url_prefix: s
|
|
|
755
762
|
|
|
756
763
|
dataset.title = rdf_value(d, DCT.title)
|
|
757
764
|
if not dataset.title:
|
|
765
|
+
# If the dataset is externaly defined (so without title and just with a link to the dataset XML)
|
|
766
|
+
# we should have skipped it way before in :ExcludeExternalyDefinedDataset
|
|
758
767
|
raise HarvestSkipException("missing title on dataset")
|
|
759
768
|
|
|
760
769
|
# Support dct:abstract if dct:description is missing (sometimes used instead)
|
|
@@ -834,7 +843,13 @@ def dataset_from_rdf(graph: Graph, dataset=None, node=None, remote_url_prefix: s
|
|
|
834
843
|
dataset.harvest.uri = uri
|
|
835
844
|
dataset.harvest.remote_url = remote_url
|
|
836
845
|
dataset.harvest.created_at = created_at
|
|
837
|
-
|
|
846
|
+
|
|
847
|
+
# In the past, we've encountered future `modified_at` during harvesting
|
|
848
|
+
# do not save it. :FutureHarvestModifiedAt
|
|
849
|
+
if modified_at and to_naive_datetime(modified_at) > datetime.utcnow():
|
|
850
|
+
log.warning(f"Future `DCT.modified` date '{modified_at}' in dataset")
|
|
851
|
+
else:
|
|
852
|
+
dataset.harvest.modified_at = modified_at
|
|
838
853
|
|
|
839
854
|
return dataset
|
|
840
855
|
|
udata/harvest/backends/base.py
CHANGED
|
@@ -256,7 +256,7 @@ class BaseBackend(object):
|
|
|
256
256
|
]
|
|
257
257
|
self.save_job()
|
|
258
258
|
|
|
259
|
-
def
|
|
259
|
+
def has_reached_max_items(self) -> bool:
|
|
260
260
|
"""Should be called after process_dataset to know if we reach the max items"""
|
|
261
261
|
return self.max_items and len(self.job.items) >= self.max_items
|
|
262
262
|
|
udata/harvest/backends/dcat.py
CHANGED
|
@@ -9,7 +9,7 @@ from rdflib.namespace import RDF
|
|
|
9
9
|
|
|
10
10
|
from udata.core.dataservices.rdf import dataservice_from_rdf
|
|
11
11
|
from udata.core.dataset.rdf import dataset_from_rdf
|
|
12
|
-
from udata.harvest.models import HarvestItem
|
|
12
|
+
from udata.harvest.models import HarvestError, HarvestItem
|
|
13
13
|
from udata.i18n import gettext as _
|
|
14
14
|
from udata.rdf import (
|
|
15
15
|
DCAT,
|
|
@@ -18,6 +18,7 @@ from udata.rdf import (
|
|
|
18
18
|
SPDX,
|
|
19
19
|
guess_format,
|
|
20
20
|
namespace_manager,
|
|
21
|
+
rdf_value,
|
|
21
22
|
url_from_rdf,
|
|
22
23
|
)
|
|
23
24
|
from udata.storage.s3 import store_as_json
|
|
@@ -77,9 +78,19 @@ class DcatBackend(BaseBackend):
|
|
|
77
78
|
self.process_one_datasets_page(page_number, page)
|
|
78
79
|
serialized_graphs.append(page.serialize(format=fmt, indent=None))
|
|
79
80
|
|
|
81
|
+
# We do a second pass to have all datasets in memory and attach datasets
|
|
82
|
+
# to dataservices. It could be better to be one pass of graph walking and
|
|
83
|
+
# then one pass of attaching datasets to dataservices.
|
|
80
84
|
for page_number, page in self.walk_graph(self.source.url, fmt):
|
|
81
85
|
self.process_one_dataservices_page(page_number, page)
|
|
82
86
|
|
|
87
|
+
if not self.dryrun and self.has_reached_max_items():
|
|
88
|
+
# We have reached the max_items limit. Warn the user that all the datasets may not be present.
|
|
89
|
+
error = HarvestError(
|
|
90
|
+
message=f"{self.max_items} max items reached, not all datasets/dataservices were retrieved"
|
|
91
|
+
)
|
|
92
|
+
self.job.errors.append(error)
|
|
93
|
+
|
|
83
94
|
# The official MongoDB document size in 16MB. The default value here is 15MB to account for other fields in the document (and for difference between * 1024 vs * 1000).
|
|
84
95
|
max_harvest_graph_size_in_mongo = current_app.config.get(
|
|
85
96
|
"HARVEST_MAX_CATALOG_SIZE_IN_MONGO"
|
|
@@ -146,7 +157,7 @@ class DcatBackend(BaseBackend):
|
|
|
146
157
|
break
|
|
147
158
|
|
|
148
159
|
yield page_number, subgraph
|
|
149
|
-
if self.
|
|
160
|
+
if self.has_reached_max_items():
|
|
150
161
|
return
|
|
151
162
|
|
|
152
163
|
page_number += 1
|
|
@@ -154,17 +165,48 @@ class DcatBackend(BaseBackend):
|
|
|
154
165
|
def process_one_datasets_page(self, page_number: int, page: Graph):
|
|
155
166
|
for node in page.subjects(RDF.type, DCAT.Dataset):
|
|
156
167
|
remote_id = page.value(node, DCT.identifier)
|
|
168
|
+
if self.is_dataset_external_to_this_page(page, node):
|
|
169
|
+
continue
|
|
170
|
+
|
|
157
171
|
self.process_dataset(remote_id, page_number=page_number, page=page, node=node)
|
|
158
172
|
|
|
159
|
-
if self.
|
|
173
|
+
if self.has_reached_max_items():
|
|
160
174
|
return
|
|
161
175
|
|
|
176
|
+
def is_dataset_external_to_this_page(self, page: Graph, node) -> bool:
|
|
177
|
+
# In dataservice nodes we have `servesDataset` or `hasPart` that can contains nodes
|
|
178
|
+
# with type=dataset. We don't want to process them because these nodes are empty (they
|
|
179
|
+
# only contains a link to the dataset definition).
|
|
180
|
+
# These datasets are either present in the catalog in previous or next pages or
|
|
181
|
+
# external from the catalog we are currently harvesting (so we don't want to harvest them).
|
|
182
|
+
# First we thought of skipping them inside `dataset_from_rdf` (see :ExcludeExternalyDefinedDataset)
|
|
183
|
+
# but it creates a lot of "fake" items in the job and raising problems (reaching the max harvest item for
|
|
184
|
+
# example and not getting to the "real" datasets/dataservices in subsequent pages)
|
|
185
|
+
# So to prevent creating a lot of useless items in the job we first thought about checking to see if there is no title and
|
|
186
|
+
# if `isPrimaryTopicOf` is present. But it may be better to check if the only link of the node with the current page is a
|
|
187
|
+
# `servesDataset` or `hasPart`. If it's the case, the node is only present in a dataservice. (maybe we could also check that
|
|
188
|
+
# the `_other_node` is a dataservice?)
|
|
189
|
+
# `isPrimaryTopicOf` is the tag present in the first harvester raising the problem, it may exists other
|
|
190
|
+
# values of the same sort we need to check here.
|
|
191
|
+
|
|
192
|
+
# This is not dangerous because we check for missing title in `dataset_from_rdf` later so we would have skipped
|
|
193
|
+
# this dataset anyway.
|
|
194
|
+
resource = page.resource(node)
|
|
195
|
+
title = rdf_value(resource, DCT.title)
|
|
196
|
+
if title:
|
|
197
|
+
return False
|
|
198
|
+
|
|
199
|
+
predicates = [link_type for (_other_node, link_type) in page.subject_predicates(node)]
|
|
200
|
+
return len(predicates) == 1 and (
|
|
201
|
+
predicates[0] == DCAT.servesDataset or predicates[0] == DCT.hasPart
|
|
202
|
+
)
|
|
203
|
+
|
|
162
204
|
def process_one_dataservices_page(self, page_number: int, page: Graph):
|
|
163
205
|
for node in page.subjects(RDF.type, DCAT.DataService):
|
|
164
206
|
remote_id = page.value(node, DCT.identifier)
|
|
165
207
|
self.process_dataservice(remote_id, page_number=page_number, page=page, node=node)
|
|
166
208
|
|
|
167
|
-
if self.
|
|
209
|
+
if self.has_reached_max_items():
|
|
168
210
|
return
|
|
169
211
|
|
|
170
212
|
def inner_process_dataset(self, item: HarvestItem, page_number: int, page: Graph, node):
|
|
@@ -266,7 +308,7 @@ class CswDcatBackend(DcatBackend):
|
|
|
266
308
|
subgraph.parse(data=ET.tostring(child), format=fmt)
|
|
267
309
|
|
|
268
310
|
yield page_number, subgraph
|
|
269
|
-
if self.
|
|
311
|
+
if self.has_reached_max_items():
|
|
270
312
|
return
|
|
271
313
|
|
|
272
314
|
next_record = self.next_record_if_should_continue(start, search_results)
|
|
@@ -375,7 +417,7 @@ class CswIso19139DcatBackend(DcatBackend):
|
|
|
375
417
|
raise ValueError("Failed to fetch CSW content")
|
|
376
418
|
|
|
377
419
|
yield page_number, subgraph
|
|
378
|
-
if self.
|
|
420
|
+
if self.has_reached_max_items():
|
|
379
421
|
return
|
|
380
422
|
|
|
381
423
|
next_record = self.next_record_if_should_continue(start, search_results)
|
udata/harvest/tests/factories.py
CHANGED
|
@@ -61,7 +61,7 @@ class FactoryBackend(backends.BaseBackend):
|
|
|
61
61
|
mock_initialize.send(self)
|
|
62
62
|
for i in range(self.config.get("count", DEFAULT_COUNT)):
|
|
63
63
|
self.process_dataset(str(i))
|
|
64
|
-
if self.
|
|
64
|
+
if self.has_reached_max_items():
|
|
65
65
|
return
|
|
66
66
|
|
|
67
67
|
def inner_process_dataset(self, item: HarvestItem):
|
|
@@ -44,12 +44,12 @@ class FakeBackend(BaseBackend):
|
|
|
44
44
|
def inner_harvest(self):
|
|
45
45
|
for remote_id in self.source.config.get("dataset_remote_ids", []):
|
|
46
46
|
self.process_dataset(remote_id)
|
|
47
|
-
if self.
|
|
47
|
+
if self.has_reached_max_items():
|
|
48
48
|
return
|
|
49
49
|
|
|
50
50
|
for remote_id in self.source.config.get("dataservice_remote_ids", []):
|
|
51
51
|
self.process_dataservice(remote_id)
|
|
52
|
-
if self.
|
|
52
|
+
if self.has_reached_max_items():
|
|
53
53
|
return
|
|
54
54
|
|
|
55
55
|
def inner_process_dataset(self, item: HarvestItem):
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This migration keeps only the "Local authority" badge if the organization also has the "Public service" badge.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
|
|
7
|
+
import click
|
|
8
|
+
|
|
9
|
+
from udata.core.dataset.models import Dataset
|
|
10
|
+
|
|
11
|
+
log = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def migrate(db):
|
|
15
|
+
log.info("Saving all datasets")
|
|
16
|
+
|
|
17
|
+
count = Dataset.objects().count()
|
|
18
|
+
with click.progressbar(Dataset.objects(), length=count) as datasets:
|
|
19
|
+
for dataset in datasets:
|
|
20
|
+
try:
|
|
21
|
+
dataset.save()
|
|
22
|
+
except Exception as err:
|
|
23
|
+
log.error(f"Cannot save dataset {dataset.id} {err}")
|
|
24
|
+
|
|
25
|
+
log.info("Done")
|