udata 10.2.1.dev34728__py2.py3-none-any.whl → 10.2.1.dev34761__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of udata might be problematic. Click here for more details.

@@ -384,7 +384,7 @@ class ResourceMixin(object):
384
384
  return to_naive_datetime(self.harvest.modified_at)
385
385
  if self.filetype == "remote" and self.extras.get("analysis:last-modified-at"):
386
386
  return to_naive_datetime(self.extras.get("analysis:last-modified-at"))
387
- return self.last_modified_internal
387
+ return to_naive_datetime(self.last_modified_internal)
388
388
 
389
389
  def clean(self):
390
390
  super(ResourceMixin, self).clean()
@@ -565,6 +565,8 @@ class Dataset(WithMetrics, DatasetBadgeMixin, Owned, db.Document):
565
565
  extras = db.ExtrasField()
566
566
  harvest = db.EmbeddedDocumentField(HarvestDatasetMetadata)
567
567
 
568
+ quality_cached = db.DictField()
569
+
568
570
  featured = db.BooleanField(required=True, default=False)
569
571
 
570
572
  contact_points = db.ListField(db.ReferenceField("ContactPoint", reverse_delete_rule=db.PULL))
@@ -672,6 +674,8 @@ class Dataset(WithMetrics, DatasetBadgeMixin, Owned, db.Document):
672
674
  if len(set(res.id for res in self.resources)) != len(self.resources):
673
675
  raise MongoEngineValidationError(f"Duplicate resource ID in dataset #{self.id}.")
674
676
 
677
+ self.quality_cached = self.compute_quality()
678
+
675
679
  for key, value in self.extras.items():
676
680
  if not key.startswith("custom:"):
677
681
  continue
@@ -763,13 +767,9 @@ class Dataset(WithMetrics, DatasetBadgeMixin, Owned, db.Document):
763
767
 
764
768
  @property
765
769
  def last_modified(self):
766
- if (
767
- self.harvest
768
- and self.harvest.modified_at
769
- and to_naive_datetime(self.harvest.modified_at) < datetime.utcnow()
770
- ):
770
+ if self.harvest and self.harvest.modified_at:
771
771
  return to_naive_datetime(self.harvest.modified_at)
772
- return self.last_modified_internal
772
+ return to_naive_datetime(self.last_modified_internal)
773
773
 
774
774
  @property
775
775
  def last_update(self):
@@ -824,8 +824,34 @@ class Dataset(WithMetrics, DatasetBadgeMixin, Owned, db.Document):
824
824
  else:
825
825
  return self.last_update + delta
826
826
 
827
- @cached_property
827
+ @property
828
828
  def quality(self):
829
+ # `quality_cached` should always be set, except during the migration
830
+ # creating this property. We could remove `or self.compute_quality()`
831
+ # after the migration but since we need to keep the computed property for
832
+ # `update_fulfilled_in_time`, maybe we leave it here? Just in case?
833
+ quality = self.quality_cached or self.compute_quality()
834
+
835
+ # :UpdateFulfilledInTime
836
+ # `next_update_for_update_fulfilled_in_time` is only useful to compute the
837
+ # real `update_fulfilled_in_time` check, so we pop it to not polute the `quality`
838
+ # object for users.
839
+ next_update = quality.pop("next_update_for_update_fulfilled_in_time", None)
840
+ if next_update:
841
+ # Allow for being one day late on update.
842
+ # We may have up to one day delay due to harvesting for example
843
+ quality["update_fulfilled_in_time"] = (next_update - datetime.utcnow()).days >= -1
844
+ elif self.frequency in ["continuous", "irregular", "punctual"]:
845
+ # For these frequencies, we don't expect regular updates or can't quantify them.
846
+ # Thus we consider the update_fulfilled_in_time quality criterion to be true.
847
+ quality["update_fulfilled_in_time"] = True
848
+
849
+ # Since `update_fulfilled_in_time` cannot be precomputed, `score` cannot either.
850
+ quality["score"] = self.compute_quality_score(quality)
851
+
852
+ return quality
853
+
854
+ def compute_quality(self):
829
855
  """Return a dict filled with metrics related to the inner
830
856
 
831
857
  quality of the dataset:
@@ -835,25 +861,18 @@ class Dataset(WithMetrics, DatasetBadgeMixin, Owned, db.Document):
835
861
  * and so on
836
862
  """
837
863
  result = {}
838
- if not self.id:
839
- # Quality is only relevant on saved Datasets
840
- return result
841
864
 
842
865
  result["license"] = True if self.license else False
843
866
  result["temporal_coverage"] = True if self.temporal_coverage else False
844
867
  result["spatial"] = True if self.spatial else False
845
868
 
846
869
  result["update_frequency"] = self.frequency and self.frequency != "unknown"
847
- if self.next_update:
848
- # Allow for being one day late on update.
849
- # We may have up to one day delay due to harvesting for example
850
- result["update_fulfilled_in_time"] = (
851
- True if (self.next_update - datetime.utcnow()).days >= -1 else False
852
- )
853
- elif self.frequency in ["continuous", "irregular", "punctual"]:
854
- # For these frequencies, we don't expect regular updates or can't quantify them.
855
- # Thus we consider the update_fulfilled_in_time quality criterion to be true.
856
- result["update_fulfilled_in_time"] = True
870
+
871
+ # We only save the next_update here because it is based on resources
872
+ # We cannot save the `update_fulfilled_in_time` because it is time
873
+ # sensitive (so setting it on save is not really useful…)
874
+ # See :UpdateFulfilledInTime
875
+ result["next_update_for_update_fulfilled_in_time"] = self.next_update
857
876
 
858
877
  result["dataset_description_quality"] = (
859
878
  True
@@ -876,7 +895,6 @@ class Dataset(WithMetrics, DatasetBadgeMixin, Owned, db.Document):
876
895
  resource_desc = True
877
896
  result["resources_documentation"] = resource_doc or resource_desc
878
897
 
879
- result["score"] = self.compute_quality_score(result)
880
898
  return result
881
899
 
882
900
  @property
@@ -934,8 +952,16 @@ class Dataset(WithMetrics, DatasetBadgeMixin, Owned, db.Document):
934
952
  if resource.id in [r.id for r in self.resources]:
935
953
  raise MongoEngineValidationError("Cannot add resource with already existing ID")
936
954
 
955
+ self.resources.insert(0, resource)
937
956
  self.update(
938
- __raw__={"$push": {"resources": {"$each": [resource.to_mongo()], "$position": 0}}}
957
+ __raw__={
958
+ "$set": {
959
+ "quality_cached": self.compute_quality(),
960
+ },
961
+ "$push": {
962
+ "resources": {"$each": [resource.to_mongo()], "$position": 0},
963
+ },
964
+ }
939
965
  )
940
966
  self.reload()
941
967
  self.on_resource_added.send(self.__class__, document=self, resource_id=resource.id)
udata/core/dataset/rdf.py CHANGED
@@ -5,7 +5,7 @@ This module centralize dataset helpers for RDF/DCAT serialization and parsing
5
5
  import calendar
6
6
  import json
7
7
  import logging
8
- from datetime import date
8
+ from datetime import date, datetime
9
9
  from typing import Optional
10
10
 
11
11
  from dateutil.parser import parse as parse_dt
@@ -50,7 +50,7 @@ from udata.rdf import (
50
50
  url_from_rdf,
51
51
  )
52
52
  from udata.uris import endpoint_for
53
- from udata.utils import get_by, safe_unicode
53
+ from udata.utils import get_by, safe_unicode, to_naive_datetime
54
54
 
55
55
  from .constants import OGC_SERVICE_FORMATS, UPDATE_FREQUENCIES
56
56
  from .models import Checksum, Dataset, License, Resource
@@ -735,7 +735,14 @@ def resource_from_rdf(graph_or_distrib, dataset=None, is_additionnal=False):
735
735
  if not resource.harvest:
736
736
  resource.harvest = HarvestResourceMetadata()
737
737
  resource.harvest.created_at = created_at
738
- resource.harvest.modified_at = modified_at
738
+
739
+ # In the past, we've encountered future `modified_at` during harvesting
740
+ # do not save it. :FutureHarvestModifiedAt
741
+ if modified_at and to_naive_datetime(modified_at) > datetime.utcnow():
742
+ log.warning(f"Future `DCT.modified` date '{modified_at}' in resource")
743
+ else:
744
+ resource.harvest.modified_at = modified_at
745
+
739
746
  resource.harvest.dct_identifier = identifier
740
747
  resource.harvest.uri = uri
741
748
 
@@ -836,7 +843,13 @@ def dataset_from_rdf(graph: Graph, dataset=None, node=None, remote_url_prefix: s
836
843
  dataset.harvest.uri = uri
837
844
  dataset.harvest.remote_url = remote_url
838
845
  dataset.harvest.created_at = created_at
839
- dataset.harvest.modified_at = modified_at
846
+
847
+ # In the past, we've encountered future `modified_at` during harvesting
848
+ # do not save it. :FutureHarvestModifiedAt
849
+ if modified_at and to_naive_datetime(modified_at) > datetime.utcnow():
850
+ log.warning(f"Future `DCT.modified` date '{modified_at}' in dataset")
851
+ else:
852
+ dataset.harvest.modified_at = modified_at
840
853
 
841
854
  return dataset
842
855
 
@@ -0,0 +1,25 @@
1
+ """
2
+ This migration keeps only the "Local authority" badge if the organization also has the "Public service" badge.
3
+ """
4
+
5
+ import logging
6
+
7
+ import click
8
+
9
+ from udata.core.dataset.models import Dataset
10
+
11
+ log = logging.getLogger(__name__)
12
+
13
+
14
+ def migrate(db):
15
+ log.info("Saving all datasets")
16
+
17
+ count = Dataset.objects().count()
18
+ with click.progressbar(Dataset.objects(), length=count) as datasets:
19
+ for dataset in datasets:
20
+ try:
21
+ dataset.save()
22
+ except Exception as err:
23
+ log.error(f"Cannot save dataset {dataset.id} {err}")
24
+
25
+ log.info("Done")
@@ -21,7 +21,7 @@ class DatasetAPIV2Test(APITestCase):
21
21
  resources_a = [ResourceFactory() for _ in range(2)]
22
22
  dataset_a = DatasetFactory(title="Dataset A", resources=resources_a)
23
23
 
24
- resources_b = [ResourceFactory() for _ in range(4)]
24
+ resources_b = [ResourceFactory(format="csv") for _ in range(4)]
25
25
  dataset_b = DatasetFactory(title="Dataset B", resources=resources_b)
26
26
 
27
27
  response = self.get(url_for("apiv2.datasets"))
@@ -32,6 +32,11 @@ class DatasetAPIV2Test(APITestCase):
32
32
  assert data["data"][1]["title"] == dataset_a.title
33
33
  assert data["data"][0]["title"] == dataset_b.title
34
34
 
35
+ assert data["data"][1]["quality"]["has_resources"]
36
+ assert not data["data"][1]["quality"]["has_open_format"]
37
+ assert data["data"][0]["quality"]["has_resources"]
38
+ assert data["data"][0]["quality"]["has_open_format"]
39
+
35
40
  assert data["data"][1]["resources"]["total"] == len(resources_a)
36
41
  assert data["data"][0]["resources"]["total"] == len(resources_b)
37
42
 
@@ -45,6 +50,7 @@ class DatasetAPIV2Test(APITestCase):
45
50
  response = self.get(url_for("apiv2.dataset", dataset=dataset))
46
51
  self.assert200(response)
47
52
  data = response.json
53
+ assert data["quality"]["has_resources"]
48
54
  assert data["resources"]["rel"] == "subsection"
49
55
  assert data["resources"]["href"] == url_for(
50
56
  "apiv2.resources",
@@ -748,16 +748,6 @@ class HarvestMetadataTest:
748
748
  dataset.harvest = harvest_metadata
749
749
  dataset.save()
750
750
 
751
- def test_harvest_dataset_metadata_future_modifed_at(self):
752
- dataset = DatasetFactory()
753
-
754
- harvest_metadata = HarvestDatasetMetadata(
755
- created_at=datetime.utcnow(), modified_at=datetime.utcnow() + timedelta(days=1)
756
- )
757
- dataset.harvest = harvest_metadata
758
- dataset.save()
759
- assert dataset.last_modified == dataset.last_modified_internal
760
-
761
751
  def test_harvest_dataset_metadata_past_modifed_at(self):
762
752
  dataset = DatasetFactory()
763
753
 
@@ -483,6 +483,24 @@ class RdfToDatasetTest:
483
483
  assert isinstance(dataset, Dataset)
484
484
  assert dataset.description == "a description"
485
485
 
486
+ def test_future_modified_at(self):
487
+ node = BNode()
488
+ g = Graph()
489
+
490
+ modified = faker.future_datetime()
491
+
492
+ g.add((node, RDF.type, DCAT.Dataset))
493
+ g.add((node, DCT.identifier, Literal(faker.uuid4())))
494
+ g.add((node, DCT.title, Literal(faker.sentence())))
495
+ g.add((node, DCT.description, Literal("<div>a description</div>")))
496
+ g.add((node, DCT.modified, Literal(modified)))
497
+
498
+ dataset = dataset_from_rdf(g)
499
+ dataset.validate()
500
+
501
+ assert isinstance(dataset, Dataset)
502
+ assert dataset.harvest.modified_at is None
503
+
486
504
  def test_theme_and_tags(self):
487
505
  node = BNode()
488
506
  g = Graph()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: udata
3
- Version: 10.2.1.dev34728
3
+ Version: 10.2.1.dev34761
4
4
  Summary: Open data portal
5
5
  Home-page: https://github.com/opendatateam/udata
6
6
  Author: Opendata Team
@@ -143,6 +143,7 @@ It is collectively taken care of by members of the
143
143
 
144
144
  - Skip empty datasets nodes referenced in dataservices before creating an harvest job item [#3285](https://github.com/opendatateam/udata/pull/3285)
145
145
  - Harvest Distribution with a DCAT.accessService property as type `api` [#3294](https://github.com/opendatateam/udata/pull/3294)
146
+ - Compute quality score to show correct resources score in api v2 [#3290](https://github.com/opendatateam/udata/pull/3290/)
146
147
 
147
148
  ## 10.2.0 (2025-04-02)
148
149
 
@@ -103,10 +103,10 @@ udata/core/dataset/events.py,sha256=bSM0nFEX14r4JHc-bAM-7OOuD3JAxUIpw9GgXbOsUyw,
103
103
  udata/core/dataset/exceptions.py,sha256=uKiayLSpSzsnLvClObS6hOO0qXEqvURKN7_w8eimQNU,498
104
104
  udata/core/dataset/factories.py,sha256=fRDWDlybR_ud4pDs1-ntWuYHKtV9LMHeBOBp2SmTT6M,9006
105
105
  udata/core/dataset/forms.py,sha256=nGHC92MH3NmyfASuYRgoBzyNTpsWL_1h3R7B9t7nnxU,6295
106
- udata/core/dataset/models.py,sha256=GurpAR_IyPqqp9l72IB0EQMafInYigE0iEU_h7CpCV0,39594
106
+ udata/core/dataset/models.py,sha256=2c6vjr5KPq6Pqg6MK5pRFrXzpTIXtRC2zRxDlxiFTL8,40834
107
107
  udata/core/dataset/permissions.py,sha256=zXQ6kU-Ni3Pl5tDtat-ZPupug9InsNeCN7xRLc2Vcrc,1097
108
108
  udata/core/dataset/preview.py,sha256=IwCqiNTjjXbtA_SSKF52pwnzKKEz0GyYM95QNn2Dkog,2561
109
- udata/core/dataset/rdf.py,sha256=-XsXa7uN1Ovt1RjC_-LYewTAMMRxG270nEocMeBk_i0,31176
109
+ udata/core/dataset/rdf.py,sha256=HkjzcWgq9AfPvUGMRI7-ufRrgnlfBmP8crbgRhg6Lz4,31789
110
110
  udata/core/dataset/search.py,sha256=E7LqHBnq3sMefvmLwTpiw-Ovem2a3NJswHesRjctboE,5627
111
111
  udata/core/dataset/signals.py,sha256=WN4sV-lJlNsRkhcnhoy0SYJvCoYmK_5QFYZd1u-h4gs,161
112
112
  udata/core/dataset/tasks.py,sha256=jG6U7cwGywhLK-MqUrvbZ1WLl_NLApiThtxmsckGzP8,10009
@@ -359,6 +359,7 @@ udata/migrations/2024-10-30-rename-organization-badges.py,sha256=Yu8PTayYY0m4YJJ
359
359
  udata/migrations/2024-11-19-keep-only-local_authority-if-also-public_service-organization-badges.py,sha256=xFn0nVqAh8Vee9-s8YFZHg5F-kukOr5B4fyz62G40eA,658
360
360
  udata/migrations/2024-12-05-contact-point-is-now-a-list.py,sha256=il2qSFhOTq-YhqhFaq_5OwysUlKXaK_En-qGP6v9rf0,1065
361
361
  udata/migrations/2025-01-05-dataservices-fields-changes.py,sha256=HlqHg3sG3rk3sYVrOwAlXMNhTmTKd1YT82P-gXOqmZM,4647
362
+ udata/migrations/2025-03-20-save-quality-for-datasets.py,sha256=FPTfGVByXSHr18V4RFlktC7t-H-5rgEcZQMTRpMrGqo,607
362
363
  udata/migrations/__init__.py,sha256=RBCBDaTlLjuMs_Qzwji6Z6T4r7FCGXhESKoxQbT5qAA,11221
363
364
  udata/models/__init__.py,sha256=txbZwa-lRG3mq99eQ9E5YcFWiNUdjDVSyJJvlqUMFfs,1413
364
365
  udata/mongo/__init__.py,sha256=y4Rv-kq3o_kcEulcNpePLzocXPBNpx3Jd82G-VZPaMc,1421
@@ -634,7 +635,7 @@ udata/tests/api/test_topics_api.py,sha256=mpIjztDEQ2fx25E9dl4eIARGqtYuh1w8x68PhR
634
635
  udata/tests/api/test_transfer_api.py,sha256=-OLv-KjyLZL14J8UHl-ak_sYUj6wFiZWyoXC2SMXmEQ,7503
635
636
  udata/tests/api/test_user_api.py,sha256=hfDHOv5F81tehbc5u3dYH5rAyYgzgHGCWVcGR3kLIZw,16591
636
637
  udata/tests/apiv2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
637
- udata/tests/apiv2/test_datasets.py,sha256=3M4pEvxQS1bCzP4Cje-Sj-1QpkR8q3EQnJDD4w15Dpk,21668
638
+ udata/tests/apiv2/test_datasets.py,sha256=zjTtjY7lJKOOjiV698RqGvRMuuMxY2dMhEcnhOXWUQg,21973
638
639
  udata/tests/apiv2/test_me_api.py,sha256=RAgu5iKGB3Pp8cDN-427ovly-VOz5MTL4GpPZ1NcVXI,1418
639
640
  udata/tests/apiv2/test_organizations.py,sha256=os_43s-coSRqjgY-5fAjSiRlB3g2685u7d-Es0aOhks,6390
640
641
  udata/tests/apiv2/test_swagger.py,sha256=RKedaq-2UeyEuxlmUaAN7pmEe-lQYYmpDUVc8HF3CH4,785
@@ -652,8 +653,8 @@ udata/tests/dataset/test_csv_adapter.py,sha256=CSAHEur5-183fRsD_cBgsHnLuohn0DOb9
652
653
  udata/tests/dataset/test_dataset_actions.py,sha256=bgDjVYjOvu3sX_FCTCzf2snZYSprsqor2nAhIVuokSs,722
653
654
  udata/tests/dataset/test_dataset_commands.py,sha256=zMPJG2wYwKBee2zI65kmboxf59Zqa84DDjT8V5wj9uo,801
654
655
  udata/tests/dataset/test_dataset_events.py,sha256=hlrpoOiBbnX_COUI9Pzdqlp45GZZDqu5piwupbnPiTI,3601
655
- udata/tests/dataset/test_dataset_model.py,sha256=uz2oNuzL6tEqd34JrBP0J1BVlt8JQoAB_u3tYM9etPg,32407
656
- udata/tests/dataset/test_dataset_rdf.py,sha256=1eBbn7rl2bcHGaJoxXMnJpszteShXvVg3djEw6HxOUw,40721
656
+ udata/tests/dataset/test_dataset_model.py,sha256=YX9ELWA6q2tW3OEI_rDS7BPfE2p5ocA5pB9_2Yyy8Ac,32017
657
+ udata/tests/dataset/test_dataset_rdf.py,sha256=fWMfhJvxmRKPw7cdvksS-sJBJT4G9DepDIm5RTdvrJ4,41310
657
658
  udata/tests/dataset/test_dataset_tasks.py,sha256=n1W2Pg0ez02d66zQG3N93kh7dpR2yLMRDqUI6PnPaI0,3088
658
659
  udata/tests/dataset/test_resource_preview.py,sha256=fp9mSL7unhyM66GR0gwhgX3OGQ4TJt7G9xU-CjsL3HI,3908
659
660
  udata/tests/features/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -720,9 +721,9 @@ udata/translations/pt/LC_MESSAGES/udata.mo,sha256=xmSQ7RTl9XV24Vq3A7f_NIRDrOJwbs
720
721
  udata/translations/pt/LC_MESSAGES/udata.po,sha256=PXrrhfIjZxheUotQan9VVh1fvNHSzeMAitaouIHaR7U,46793
721
722
  udata/translations/sr/LC_MESSAGES/udata.mo,sha256=1pAf_rXvbOoO_jjZmH77GbzvdT_YtPTJKFumMnMto2g,29169
722
723
  udata/translations/sr/LC_MESSAGES/udata.po,sha256=AalMHaFLZobKmuAnZ4X1rQtk46NdW2rktMFQHD5DTcM,53768
723
- udata-10.2.1.dev34728.dist-info/LICENSE,sha256=V8j_M8nAz8PvAOZQocyRDX7keai8UJ9skgmnwqETmdY,34520
724
- udata-10.2.1.dev34728.dist-info/METADATA,sha256=X_iyv6wTV8PL7AqcqQYNU43ISlJ08S8OpCDrFbm1n98,143896
725
- udata-10.2.1.dev34728.dist-info/WHEEL,sha256=Kh9pAotZVRFj97E15yTA4iADqXdQfIVTHcNaZTjxeGM,110
726
- udata-10.2.1.dev34728.dist-info/entry_points.txt,sha256=3SKiqVy4HUqxf6iWspgMqH8d88Htk6KoLbG1BU-UddQ,451
727
- udata-10.2.1.dev34728.dist-info/top_level.txt,sha256=39OCg-VWFWOq4gCKnjKNu-s3OwFlZIu_dVH8Gl6ndHw,12
728
- udata-10.2.1.dev34728.dist-info/RECORD,,
724
+ udata-10.2.1.dev34761.dist-info/LICENSE,sha256=V8j_M8nAz8PvAOZQocyRDX7keai8UJ9skgmnwqETmdY,34520
725
+ udata-10.2.1.dev34761.dist-info/METADATA,sha256=T6dSfDKTVv_SfF8njP7sDL6RhZeoGgjVzqD1EMODnSM,144020
726
+ udata-10.2.1.dev34761.dist-info/WHEEL,sha256=Kh9pAotZVRFj97E15yTA4iADqXdQfIVTHcNaZTjxeGM,110
727
+ udata-10.2.1.dev34761.dist-info/entry_points.txt,sha256=3SKiqVy4HUqxf6iWspgMqH8d88Htk6KoLbG1BU-UddQ,451
728
+ udata-10.2.1.dev34761.dist-info/top_level.txt,sha256=39OCg-VWFWOq4gCKnjKNu-s3OwFlZIu_dVH8Gl6ndHw,12
729
+ udata-10.2.1.dev34761.dist-info/RECORD,,