udata 11.1.2.dev9__py3-none-any.whl → 11.1.2.dev10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of udata might be problematic. Click here for more details.

Files changed (36) hide show
  1. udata/core/dataset/api.py +2 -2
  2. udata/core/dataset/api_fields.py +3 -4
  3. udata/core/dataset/apiv2.py +6 -6
  4. udata/core/dataset/constants.py +124 -38
  5. udata/core/dataset/factories.py +2 -1
  6. udata/core/dataset/forms.py +14 -10
  7. udata/core/dataset/models.py +8 -36
  8. udata/core/dataset/rdf.py +76 -54
  9. udata/core/dataset/tasks.py +4 -9
  10. udata/harvest/backends/ckan/harvesters.py +10 -14
  11. udata/harvest/backends/maaf.py +15 -14
  12. udata/harvest/tests/ckan/test_ckan_backend.py +4 -3
  13. udata/harvest/tests/test_dcat_backend.py +3 -2
  14. udata/migrations/2025-09-04-update-legacy-frequencies.py +36 -0
  15. udata/static/chunks/{11.55ab79044cda0271b595.js → 11.51d706fb9521c16976bc.js} +3 -3
  16. udata/static/chunks/{11.55ab79044cda0271b595.js.map → 11.51d706fb9521c16976bc.js.map} +1 -1
  17. udata/static/chunks/{13.d9c1735d14038b94c17e.js → 13.f29411b06be1883356a3.js} +2 -2
  18. udata/static/chunks/{13.d9c1735d14038b94c17e.js.map → 13.f29411b06be1883356a3.js.map} +1 -1
  19. udata/static/chunks/{17.81c57c0dedf812e43013.js → 17.3bd0340930d4a314ce9c.js} +2 -2
  20. udata/static/chunks/{17.81c57c0dedf812e43013.js.map → 17.3bd0340930d4a314ce9c.js.map} +1 -1
  21. udata/static/chunks/{19.3e0e8651d948e04b8cf2.js → 19.ba0bb2baa40e899d440b.js} +3 -3
  22. udata/static/chunks/{19.3e0e8651d948e04b8cf2.js.map → 19.ba0bb2baa40e899d440b.js.map} +1 -1
  23. udata/static/chunks/{8.494b003a94383b142c18.js → 8.b966402f5d680d4bdf4a.js} +2 -2
  24. udata/static/chunks/{8.494b003a94383b142c18.js.map → 8.b966402f5d680d4bdf4a.js.map} +1 -1
  25. udata/static/common.js +1 -1
  26. udata/static/common.js.map +1 -1
  27. udata/tests/api/test_datasets_api.py +41 -12
  28. udata/tests/dataset/test_dataset_model.py +17 -53
  29. udata/tests/dataset/test_dataset_rdf.py +27 -28
  30. udata/translations/udata.pot +226 -150
  31. {udata-11.1.2.dev9.dist-info → udata-11.1.2.dev10.dist-info}/METADATA +1 -1
  32. {udata-11.1.2.dev9.dist-info → udata-11.1.2.dev10.dist-info}/RECORD +36 -35
  33. {udata-11.1.2.dev9.dist-info → udata-11.1.2.dev10.dist-info}/WHEEL +0 -0
  34. {udata-11.1.2.dev9.dist-info → udata-11.1.2.dev10.dist-info}/entry_points.txt +0 -0
  35. {udata-11.1.2.dev9.dist-info → udata-11.1.2.dev10.dist-info}/licenses/LICENSE +0 -0
  36. {udata-11.1.2.dev9.dist-info → udata-11.1.2.dev10.dist-info}/top_level.txt +0 -0
udata/core/dataset/api.py CHANGED
@@ -64,7 +64,7 @@ from .api_fields import (
64
64
  upload_community_fields,
65
65
  upload_fields,
66
66
  )
67
- from .constants import RESOURCE_TYPES, UPDATE_FREQUENCIES
67
+ from .constants import RESOURCE_TYPES, UpdateFrequency
68
68
  from .exceptions import (
69
69
  SchemasCacheUnavailableException,
70
70
  SchemasCatalogNotFoundException,
@@ -890,7 +890,7 @@ class FrequenciesAPI(API):
890
890
  @api.marshal_list_with(frequency_fields)
891
891
  def get(self):
892
892
  """List all available frequencies"""
893
- return [{"id": id, "label": label} for id, label in UPDATE_FREQUENCIES.items()]
893
+ return [{"id": f.id, "label": f.label} for f in UpdateFrequency]
894
894
 
895
895
 
896
896
  @ns.route("/extensions/", endpoint="allowed_extensions")
@@ -9,11 +9,10 @@ from udata.core.user.api_fields import user_ref_fields
9
9
  from .constants import (
10
10
  CHECKSUM_TYPES,
11
11
  DEFAULT_CHECKSUM_TYPE,
12
- DEFAULT_FREQUENCY,
13
12
  DEFAULT_LICENSE,
14
13
  RESOURCE_FILETYPES,
15
14
  RESOURCE_TYPES,
16
- UPDATE_FREQUENCIES,
15
+ UpdateFrequency,
17
16
  )
18
17
 
19
18
  checksum_fields = api.model(
@@ -361,8 +360,8 @@ dataset_fields = api.model(
361
360
  "frequency": fields.String(
362
361
  description="The update frequency",
363
362
  required=True,
364
- enum=list(UPDATE_FREQUENCIES),
365
- default=DEFAULT_FREQUENCY,
363
+ enum=list(UpdateFrequency),
364
+ default=UpdateFrequency.UNKNOWN,
366
365
  ),
367
366
  "frequency_date": fields.ISODateTime(
368
367
  description=(
@@ -30,7 +30,7 @@ from .api_fields import (
30
30
  temporal_coverage_fields,
31
31
  user_ref_fields,
32
32
  )
33
- from .constants import DEFAULT_FREQUENCY, DEFAULT_LICENSE, FULL_OBJECTS_HEADER, UPDATE_FREQUENCIES
33
+ from .constants import DEFAULT_LICENSE, FULL_OBJECTS_HEADER, UpdateFrequency
34
34
  from .models import CommunityResource, Dataset
35
35
  from .search import DatasetSearch
36
36
 
@@ -157,13 +157,13 @@ dataset_fields = apiv2.model(
157
157
  ),
158
158
  "frequency": fields.Raw(
159
159
  attribute=lambda d: {
160
- "id": d.frequency or DEFAULT_FREQUENCY,
161
- "label": UPDATE_FREQUENCIES.get(d.frequency or DEFAULT_FREQUENCY),
160
+ "id": (d.frequency or UpdateFrequency.UNKNOWN).id,
161
+ "label": (d.frequency or UpdateFrequency.UNKNOWN).label,
162
162
  }
163
163
  if request.headers.get(FULL_OBJECTS_HEADER, False, bool)
164
- else d.frequency,
165
- enum=list(UPDATE_FREQUENCIES),
166
- default=DEFAULT_FREQUENCY,
164
+ else (d.frequency or UpdateFrequency.UNKNOWN),
165
+ enum=list(UpdateFrequency),
166
+ default=UpdateFrequency.UNKNOWN,
167
167
  required=True,
168
168
  description="The update frequency (full Frequency object if `X-Get-Datasets-Full-Objects` is set, ID of the frequency otherwise)",
169
169
  ),
@@ -1,48 +1,134 @@
1
1
  from collections import OrderedDict
2
+ from datetime import datetime, timedelta
3
+ from enum import StrEnum, auto
4
+
5
+ from flask_babel import LazyString
2
6
 
3
7
  from udata.i18n import lazy_gettext as _
4
8
 
5
- #: Udata frequencies with their labels
6
- #:
7
- #: See: http://dublincore.org/groups/collections/frequency/
8
- UPDATE_FREQUENCIES = OrderedDict(
9
- [ # Dublin core equivalent
10
- ("unknown", _("Unknown")), # N/A
11
- ("punctual", _("Punctual")), # N/A
12
- ("continuous", _("Real time")), # freq:continuous
13
- ("hourly", _("Hourly")), # N/A
14
- ("fourTimesADay", _("Four times a day")), # N/A
15
- ("threeTimesADay", _("Three times a day")), # N/A
16
- ("semidaily", _("Semidaily")), # N/A
17
- ("daily", _("Daily")), # freq:daily
18
- ("fourTimesAWeek", _("Four times a week")), # N/A
19
- ("threeTimesAWeek", _("Three times a week")), # freq:threeTimesAWeek
20
- ("semiweekly", _("Semiweekly")), # freq:semiweekly
21
- ("weekly", _("Weekly")), # freq:weekly
22
- ("biweekly", _("Biweekly")), # freq:bimonthly
23
- ("threeTimesAMonth", _("Three times a month")), # freq:threeTimesAMonth
24
- ("semimonthly", _("Semimonthly")), # freq:semimonthly
25
- ("monthly", _("Monthly")), # freq:monthly
26
- ("bimonthly", _("Bimonthly")), # freq:bimonthly
27
- ("quarterly", _("Quarterly")), # freq:quarterly
28
- ("threeTimesAYear", _("Three times a year")), # freq:threeTimesAYear
29
- ("semiannual", _("Biannual")), # freq:semiannual
30
- ("annual", _("Annual")), # freq:annual
31
- ("biennial", _("Biennial")), # freq:biennial
32
- ("triennial", _("Triennial")), # freq:triennial
33
- ("quinquennial", _("Quinquennial")), # N/A
34
- ("irregular", _("Irregular")), # freq:irregular
35
- ]
36
- )
37
9
 
38
- #: Map legacy frequencies to currents
39
- LEGACY_FREQUENCIES = {
40
- "fortnighly": "biweekly",
41
- "biannual": "semiannual",
42
- "realtime": "continuous",
10
+ class UpdateFrequency(StrEnum):
11
+ """
12
+ Udata frequency vocabulary
13
+
14
+ Based on the following vocabularies:
15
+ - DC: http://dublincore.org/groups/collections/frequency/
16
+ - EU: https://publications.europa.eu/en/web/eu-vocabularies/at-dataset/-/resource/dataset/frequency
17
+ """
18
+
19
+ CONTINUOUS = auto(), _("Real time"), None # DC, EU:UPDATE_CONT
20
+ ONE_MINUTE = "oneMinute", _("Every minute"), timedelta(minutes=1) # EU:1MIN
21
+ FIVE_MINUTES = "fiveMinutes", _("Every five minutes"), timedelta(minutes=5) # EU:5MIN
22
+ TEN_MINUTES = "tenMinutes", _("Every ten minutes"), timedelta(minutes=10) # EU:10MIN
23
+ FIFTEEN_MINUTES = (
24
+ "fifteenMinutes",
25
+ _("Every fifteen minutes"),
26
+ timedelta(minutes=15),
27
+ ) # EU:15MIN
28
+ THIRTY_MINUTES = "thirtyMinutes", _("Every thirty minute"), timedelta(minutes=30) # EU:30MIN
29
+ HOURLY = auto(), _("Every hour"), timedelta(hours=1) # EU
30
+ BIHOURLY = auto(), _("Every two hours"), timedelta(hours=2) # EU
31
+ TRIHOURLY = auto(), _("Every three hours"), timedelta(hours=3) # EU
32
+ TWELVE_HOURS = "twelveHours", _("Every twelve hours"), timedelta(hours=12) # EU:12HRS
33
+ SEVERAL_TIMES_A_DAY = "severalTimesADay", _("Several times a day"), timedelta(days=1) # EU:CONT
34
+ THREE_TIMES_A_DAY = "threeTimesADay", _("Three times a day"), timedelta(days=1) # EU:DAILY_3
35
+ SEMIDAILY = auto(), _("Twice a day"), timedelta(days=1) # EU:DAILY_2
36
+ DAILY = auto(), _("Daily"), timedelta(days=1) # DC, EU
37
+ FIVE_TIMES_A_WEEK = "fiveTimesAWeek", _("Five times a week"), timedelta(weeks=1) # EU:WEEKLY_5
38
+ THREE_TIMES_A_WEEK = (
39
+ "threeTimesAWeek",
40
+ _("Three times a week"),
41
+ timedelta(weeks=1),
42
+ ) # DC, EU:WEEKLY_3
43
+ SEMIWEEKLY = auto(), _("Twice a week"), timedelta(weeks=1) # DC, EU:WEEKLY_2
44
+ WEEKLY = auto(), _("Weekly"), timedelta(weeks=1) # DC, EU
45
+ BIWEEKLY = auto(), _("Every two weeks"), timedelta(weeks=2) # DC, EU
46
+ THREE_TIMES_A_MONTH = (
47
+ "threeTimesAMonth",
48
+ _("Three times a month"),
49
+ timedelta(days=31),
50
+ ) # DC, EU:MONTHLY_3
51
+ SEMIMONTHLY = auto(), _("Twice a month"), timedelta(days=31) # DC, EU:MONTHLY_2
52
+ MONTHLY = auto(), _("Monthly"), timedelta(days=31) # DC, EU
53
+ BIMONTHLY = auto(), _("Every two months"), timedelta(days=31 * 2) # DC, EU
54
+ QUARTERLY = auto(), _("Quarterly"), timedelta(days=31 * 3) # DC, EU
55
+ THREE_TIMES_A_YEAR = (
56
+ "threeTimesAYear",
57
+ _("Three times a year"),
58
+ timedelta(days=365),
59
+ ) # DC, EU:ANNUAL_3
60
+ SEMIANNUAL = auto(), _("Twice a year"), timedelta(days=365) # DC, EU:ANNUAL_2
61
+ ANNUAL = auto(), _("Annually"), timedelta(days=365) # DC, EU
62
+ BIENNIAL = auto(), _("Every two years"), timedelta(days=365 * 2) # DC, EU
63
+ TRIENNIAL = auto(), _("Every three years"), timedelta(days=365 * 3) # DC, EU
64
+ QUADRENNIAL = auto(), _("Every four years"), timedelta(days=365 * 4) # EU
65
+ QUINQUENNIAL = auto(), _("Every five years"), timedelta(days=365 * 5) # EU
66
+ DECENNIAL = auto(), _("Every ten years"), timedelta(days=365 * 10) # EU
67
+ BIDECENNIAL = auto(), _("Every twenty years"), timedelta(days=365 * 20) # EU
68
+ TRIDECENNIAL = auto(), _("Every thirty years"), timedelta(days=365 * 30) # EU
69
+ PUNCTUAL = auto(), _("Punctual"), None # EU:AS_NEEDED
70
+ IRREGULAR = auto(), _("Irregular"), None # DC, EU:IRREG
71
+ NEVER = auto(), _("Never"), None # EU
72
+ NOT_PLANNED = "notPlanned", _("Not planned"), None # EU:NOT_PLANNED
73
+ OTHER = auto(), _("Other"), None # EU
74
+ UNKNOWN = auto(), _("Unknown"), None # EU
75
+
76
+ def __new__(cls, id: str, label: LazyString, delta: timedelta | None):
77
+ # Set _value_ so the enum value-based lookup depends only on the id field.
78
+ # See https://docs.python.org/3/howto/enum.html#when-to-use-new-vs-init
79
+ obj = str.__new__(cls, id)
80
+ obj._value_ = id
81
+ obj._label = label # type: ignore[misc]
82
+ obj._delta = delta # type: ignore[misc]
83
+ return obj
84
+
85
+ @classmethod
86
+ def _missing_(cls, value) -> "UpdateFrequency | None":
87
+ if isinstance(value, str):
88
+ return UpdateFrequency._LEGACY_FREQUENCIES.get(value) # type: ignore[misc]
89
+
90
+ @property
91
+ def id(self) -> str:
92
+ return self.value
93
+
94
+ @property
95
+ def label(self) -> LazyString:
96
+ return self._label # type: ignore[misc]
97
+
98
+ @property
99
+ def delta(self) -> timedelta | None:
100
+ return self._delta # type: ignore[misc]
101
+
102
+ def next_update(self, last_update: datetime) -> datetime | None:
103
+ return last_update + self.delta if self.delta else None
104
+
105
+
106
+ # We must declare UpdateFrequency class variables after the Enum magic
107
+ # happens, so outside of class declaration.
108
+ #
109
+ # The alternative method based on _ignore_ breaks accessing the class
110
+ # variables from outside the class, because accesses to will go
111
+ # through __getattr__ as if it were an Enum entry.
112
+ #
113
+ # FIXME(python 3.13+): Use Enum._add_value_alias_ instead:
114
+ #
115
+ # UNKNOWN = auto(), _("Unknown"), None, []
116
+ # CONTINUOUS = auto(), _("Real time"), None, ["realtime"]
117
+ # SEVERAL_TIMES_A_DAY = "severalTimesADay", ..., ["fourTimesADay"]
118
+ #
119
+ # def __new__(cls, id: str, ..., aliases: list[str]):
120
+ # ...
121
+ # for alias in aliases:
122
+ # obj._add_value_alias_(alias)
123
+ #
124
+ UpdateFrequency._LEGACY_FREQUENCIES = { # type: ignore[misc]
125
+ "realtime": UpdateFrequency.CONTINUOUS,
126
+ "fourTimesADay": UpdateFrequency.SEVERAL_TIMES_A_DAY,
127
+ "fourTimesAWeek": UpdateFrequency.OTHER,
128
+ "fortnighly": UpdateFrequency.BIWEEKLY,
129
+ "biannual": UpdateFrequency.SEMIANNUAL,
43
130
  }
44
131
 
45
- DEFAULT_FREQUENCY = "unknown"
46
132
 
47
133
  DEFAULT_LICENSE = {
48
134
  "id": "notspecified",
@@ -8,6 +8,7 @@ from udata.core.organization.factories import OrganizationFactory
8
8
  from udata.core.spatial.factories import SpatialCoverageFactory
9
9
  from udata.factories import ModelFactory
10
10
 
11
+ from .constants import UpdateFrequency
11
12
  from .models import Checksum, CommunityResource, Dataset, License, Resource
12
13
 
13
14
 
@@ -17,7 +18,7 @@ class DatasetFactory(ModelFactory):
17
18
 
18
19
  title = factory.Faker("sentence")
19
20
  description = factory.Faker("text")
20
- frequency = "unknown"
21
+ frequency = UpdateFrequency.UNKNOWN
21
22
  resources = factory.LazyAttribute(lambda o: ResourceFactory.build_batch(o.nb_resources))
22
23
 
23
24
  class Params:
@@ -6,14 +6,12 @@ from udata.mongo.errors import FieldValidationError
6
6
 
7
7
  from .constants import (
8
8
  CHECKSUM_TYPES,
9
- DEFAULT_FREQUENCY,
10
9
  DESCRIPTION_SHORT_SIZE_LIMIT,
11
10
  DESCRIPTION_SIZE_LIMIT,
12
- LEGACY_FREQUENCIES,
13
11
  RESOURCE_FILETYPES,
14
12
  RESOURCE_TYPES,
15
13
  TITLE_SIZE_LIMIT,
16
- UPDATE_FREQUENCIES,
14
+ UpdateFrequency,
17
15
  )
18
16
  from .models import (
19
17
  Checksum,
@@ -117,10 +115,11 @@ class CommunityResourceForm(BaseResourceForm):
117
115
  organization = fields.PublishAsField(_("Publish as"))
118
116
 
119
117
 
120
- def map_legacy_frequencies(form, field):
121
- """Map legacy frequencies to new ones"""
122
- if field.data in LEGACY_FREQUENCIES:
123
- field.data = LEGACY_FREQUENCIES[field.data]
118
+ def unmarshal_frequency(form, field):
119
+ # We don't need to worry about invalid field.data being fed to UpdateFrequency here,
120
+ # since the API will already have ensured incoming data matches the field definition,
121
+ # which in our case is an enum of valid UpdateFrequency values.
122
+ field.data = UpdateFrequency(field.data)
124
123
 
125
124
 
126
125
  def validate_contact_point(form, field):
@@ -160,10 +159,15 @@ class DatasetForm(ModelForm):
160
159
  license = fields.ModelSelectField(_("License"), model=License, allow_blank=True)
161
160
  frequency = fields.SelectField(
162
161
  _("Update frequency"),
163
- choices=list(UPDATE_FREQUENCIES.items()),
164
- default=DEFAULT_FREQUENCY,
162
+ choices=list(UpdateFrequency),
163
+ default=UpdateFrequency.UNKNOWN,
165
164
  validators=[validators.optional()],
166
- preprocessors=[map_legacy_frequencies],
165
+ # Unmarshaling should not happen during validation, but flask-restx makes it cumbersome
166
+ # to do it earlier, requiring a request parser (unmarshaler) separate from the marshaler,
167
+ # meaning we can't use the same object for @api.expect and @api.marshal_with.
168
+ # This should get better once flask-restx moves to something like marshmallow, which
169
+ # handles marshaling/unmarshaling more symmetrically and in the same object.
170
+ preprocessors=[unmarshal_frequency],
167
171
  description=_("The frequency at which data are updated."),
168
172
  )
169
173
  frequency_date = fields.DateTimeField(_("Expected frequency date"))
@@ -38,7 +38,6 @@ from .constants import (
38
38
  DESCRIPTION_SHORT_SIZE_LIMIT,
39
39
  HVD,
40
40
  INSPIRE,
41
- LEGACY_FREQUENCIES,
42
41
  MAX_DISTANCE,
43
42
  PIVOTAL_DATA,
44
43
  RESOURCE_FILETYPES,
@@ -47,7 +46,7 @@ from .constants import (
47
46
  SL,
48
47
  SPD,
49
48
  SR,
50
- UPDATE_FREQUENCIES,
49
+ UpdateFrequency,
51
50
  )
52
51
  from .exceptions import (
53
52
  SchemasCacheUnavailableException,
@@ -580,7 +579,8 @@ class Dataset(Auditable, WithMetrics, DatasetBadgeMixin, Owned, Linkable, db.Doc
580
579
  resources = field(db.ListField(db.EmbeddedDocumentField(Resource)), auditable=False)
581
580
 
582
581
  private = field(db.BooleanField(default=False))
583
- frequency = field(db.StringField(choices=list(UPDATE_FREQUENCIES.keys())))
582
+
583
+ frequency = field(db.EnumField(UpdateFrequency))
584
584
  frequency_date = field(db.DateTimeField(verbose_name=_("Future date of update")))
585
585
  temporal_coverage = field(db.EmbeddedDocumentField(db.DateRange))
586
586
  spatial = field(db.EmbeddedDocumentField(SpatialCoverage))
@@ -703,8 +703,6 @@ class Dataset(Auditable, WithMetrics, DatasetBadgeMixin, Owned, Linkable, db.Doc
703
703
 
704
704
  def clean(self):
705
705
  super(Dataset, self).clean()
706
- if self.frequency in LEGACY_FREQUENCIES:
707
- self.frequency = LEGACY_FREQUENCIES[self.frequency]
708
706
 
709
707
  if len(set(res.id for res in self.resources)) != len(self.resources):
710
708
  raise MongoEngineValidationError(f"Duplicate resource ID in dataset #{self.id}.")
@@ -782,8 +780,8 @@ class Dataset(Auditable, WithMetrics, DatasetBadgeMixin, Owned, Linkable, db.Doc
782
780
  return self.owner.avatar.url
783
781
 
784
782
  @property
785
- def frequency_label(self):
786
- return UPDATE_FREQUENCIES.get(self.frequency or "unknown", UPDATE_FREQUENCIES["unknown"])
783
+ def has_frequency(self):
784
+ return self.frequency not in [None, UpdateFrequency.UNKNOWN]
787
785
 
788
786
  def check_availability(self):
789
787
  """Check if resources from that dataset are available.
@@ -835,33 +833,7 @@ class Dataset(Auditable, WithMetrics, DatasetBadgeMixin, Owned, Linkable, db.Doc
835
833
  Ex: the next update for a threeTimesAday freq is not
836
834
  every 8 hours, but is maximum 24 hours later.
837
835
  """
838
- delta = None
839
- if self.frequency == "hourly":
840
- delta = timedelta(hours=1)
841
- elif self.frequency in ["fourTimesADay", "threeTimesADay", "semidaily", "daily"]:
842
- delta = timedelta(days=1)
843
- elif self.frequency in ["fourTimesAWeek", "threeTimesAWeek", "semiweekly", "weekly"]:
844
- delta = timedelta(weeks=1)
845
- elif self.frequency == "biweekly":
846
- delta = timedelta(weeks=2)
847
- elif self.frequency in ["threeTimesAMonth", "semimonthly", "monthly"]:
848
- delta = timedelta(days=31)
849
- elif self.frequency == "bimonthly":
850
- delta = timedelta(days=31 * 2)
851
- elif self.frequency == "quarterly":
852
- delta = timedelta(days=365 / 4)
853
- elif self.frequency in ["threeTimesAYear", "semiannual", "annual"]:
854
- delta = timedelta(days=365)
855
- elif self.frequency == "biennial":
856
- delta = timedelta(days=365 * 2)
857
- elif self.frequency == "triennial":
858
- delta = timedelta(days=365 * 3)
859
- elif self.frequency == "quinquennial":
860
- delta = timedelta(days=365 * 5)
861
- if delta is None:
862
- return
863
- else:
864
- return self.last_update + delta
836
+ return self.frequency.next_update(self.last_update) if self.has_frequency else None
865
837
 
866
838
  @property
867
839
  def quality(self):
@@ -880,7 +852,7 @@ class Dataset(Auditable, WithMetrics, DatasetBadgeMixin, Owned, Linkable, db.Doc
880
852
  # Allow for being one day late on update.
881
853
  # We may have up to one day delay due to harvesting for example
882
854
  quality["update_fulfilled_in_time"] = (next_update - datetime.utcnow()).days >= -1
883
- elif self.frequency in ["continuous", "irregular", "punctual"]:
855
+ elif self.has_frequency and self.frequency.delta is None:
884
856
  # For these frequencies, we don't expect regular updates or can't quantify them.
885
857
  # Thus we consider the update_fulfilled_in_time quality criterion to be true.
886
858
  quality["update_fulfilled_in_time"] = True
@@ -905,7 +877,7 @@ class Dataset(Auditable, WithMetrics, DatasetBadgeMixin, Owned, Linkable, db.Doc
905
877
  result["temporal_coverage"] = True if self.temporal_coverage else False
906
878
  result["spatial"] = True if self.spatial else False
907
879
 
908
- result["update_frequency"] = self.frequency and self.frequency != "unknown"
880
+ result["update_frequency"] = self.has_frequency
909
881
 
910
882
  # We only save the next_update here because it is based on resources
911
883
  # We cannot save the `update_fulfilled_in_time` because it is time
udata/core/dataset/rdf.py CHANGED
@@ -52,54 +52,83 @@ from udata.rdf import (
52
52
  )
53
53
  from udata.utils import get_by, safe_unicode, to_naive_datetime
54
54
 
55
- from .constants import OGC_SERVICE_FORMATS, UPDATE_FREQUENCIES
55
+ from .constants import OGC_SERVICE_FORMATS, UpdateFrequency
56
56
  from .models import Checksum, Dataset, License, Resource
57
57
 
58
58
  log = logging.getLogger(__name__)
59
59
 
60
- # Map extra frequencies (ie. not defined in Dublin Core) to closest equivalent
61
- RDF_FREQUENCIES = {
62
- "punctual": None,
63
- "hourly": FREQ.continuous,
64
- "fourTimesADay": FREQ.daily,
65
- "threeTimesADay": FREQ.daily,
66
- "semidaily": FREQ.daily,
67
- "fourTimesAWeek": FREQ.threeTimesAWeek,
68
- "quinquennial": None,
69
- "unknown": None,
60
+ FREQ_TERM_TO_UDATA = {
61
+ FREQ.continuous: UpdateFrequency.CONTINUOUS,
62
+ FREQ.daily: UpdateFrequency.DAILY,
63
+ FREQ.threeTimesAWeek: UpdateFrequency.THREE_TIMES_A_WEEK,
64
+ FREQ.semiweekly: UpdateFrequency.SEMIWEEKLY,
65
+ FREQ.weekly: UpdateFrequency.WEEKLY,
66
+ FREQ.biweekly: UpdateFrequency.BIWEEKLY,
67
+ FREQ.threeTimesAMonth: UpdateFrequency.THREE_TIMES_A_MONTH,
68
+ FREQ.semimonthly: UpdateFrequency.SEMIMONTHLY,
69
+ FREQ.monthly: UpdateFrequency.MONTHLY,
70
+ FREQ.bimonthly: UpdateFrequency.BIMONTHLY,
71
+ FREQ.quarterly: UpdateFrequency.QUARTERLY,
72
+ FREQ.threeTimesAYear: UpdateFrequency.THREE_TIMES_A_YEAR,
73
+ FREQ.semiannual: UpdateFrequency.SEMIANNUAL,
74
+ FREQ.annual: UpdateFrequency.ANNUAL,
75
+ FREQ.biennial: UpdateFrequency.BIENNIAL,
76
+ FREQ.triennial: UpdateFrequency.TRIENNIAL,
77
+ FREQ.irregular: UpdateFrequency.IRREGULAR,
78
+ }
79
+ FREQ_ID_TO_UDATA = {
80
+ namespace_manager.compute_qname(k)[2].lower(): v for k, v in FREQ_TERM_TO_UDATA.items()
70
81
  }
71
82
 
72
- # Map european frequencies to their closest equivalent
73
- # See:
74
- # - http://publications.europa.eu/mdr/resource/authority/frequency/html/frequencies-eng.html # noqa: E501
75
- # - https://publications.europa.eu/en/web/eu-vocabularies/at-dataset/-/resource/dataset/frequency # noqa: E501
76
- EU_RDF_REQUENCIES = {
77
- # Match Dublin Core name
78
- EUFREQ.ANNUAL: "annual",
79
- EUFREQ.BIENNIAL: "biennial",
80
- EUFREQ.TRIENNIAL: "triennial",
81
- EUFREQ.QUARTERLY: "quarterly",
82
- EUFREQ.MONTHLY: "monthly",
83
- EUFREQ.BIMONTHLY: "bimonthly",
84
- EUFREQ.WEEKLY: "weekly",
85
- EUFREQ.BIWEEKLY: "biweekly",
86
- EUFREQ.DAILY: "daily",
87
- # Name differs from Dublin Core
88
- EUFREQ.ANNUAL_2: "semiannual",
89
- EUFREQ.ANNUAL_3: "threeTimesAYear",
90
- EUFREQ.MONTHLY_2: "semimonthly",
91
- EUFREQ.MONTHLY_3: "threeTimesAMonth",
92
- EUFREQ.WEEKLY_2: "semiweekly",
93
- EUFREQ.WEEKLY_3: "threeTimesAWeek",
94
- EUFREQ.DAILY_2: "semidaily",
95
- EUFREQ.CONT: "continuous",
96
- EUFREQ.UPDATE_CONT: "continuous",
97
- EUFREQ.IRREG: "irregular",
98
- EUFREQ.UNKNOWN: "unknown",
99
- EUFREQ.OTHER: "unknown",
100
- EUFREQ.NEVER: "punctual",
83
+ EUFREQ_TERM_TO_UDATA = {
84
+ EUFREQ.UNKNOWN: UpdateFrequency.UNKNOWN,
85
+ EUFREQ.UPDATE_CONT: UpdateFrequency.CONTINUOUS,
86
+ getattr(EUFREQ, "1MIN"): UpdateFrequency.ONE_MINUTE,
87
+ getattr(EUFREQ, "5MIN"): UpdateFrequency.FIVE_MINUTES,
88
+ getattr(EUFREQ, "10MIN"): UpdateFrequency.TEN_MINUTES,
89
+ getattr(EUFREQ, "15MIN"): UpdateFrequency.FIFTEEN_MINUTES,
90
+ getattr(EUFREQ, "30MIN"): UpdateFrequency.THIRTY_MINUTES,
91
+ EUFREQ.HOURLY: UpdateFrequency.HOURLY,
92
+ EUFREQ.BIHOURLY: UpdateFrequency.BIHOURLY,
93
+ EUFREQ.TRIHOURLY: UpdateFrequency.TRIHOURLY,
94
+ getattr(EUFREQ, "12HRS"): UpdateFrequency.TWELVE_HOURS,
95
+ EUFREQ.CONT: UpdateFrequency.SEVERAL_TIMES_A_DAY,
96
+ EUFREQ.DAILY_3: UpdateFrequency.THREE_TIMES_A_DAY,
97
+ EUFREQ.DAILY_2: UpdateFrequency.SEMIDAILY,
98
+ EUFREQ.DAILY: UpdateFrequency.DAILY,
99
+ EUFREQ.WEEKLY_5: UpdateFrequency.FIVE_TIMES_A_WEEK,
100
+ EUFREQ.WEEKLY_3: UpdateFrequency.THREE_TIMES_A_WEEK,
101
+ EUFREQ.WEEKLY_2: UpdateFrequency.SEMIWEEKLY,
102
+ EUFREQ.WEEKLY: UpdateFrequency.WEEKLY,
103
+ EUFREQ.BIWEEKLY: UpdateFrequency.BIWEEKLY,
104
+ EUFREQ.MONTHLY_3: UpdateFrequency.THREE_TIMES_A_MONTH,
105
+ EUFREQ.MONTHLY_2: UpdateFrequency.SEMIMONTHLY,
106
+ EUFREQ.MONTHLY: UpdateFrequency.MONTHLY,
107
+ EUFREQ.BIMONTHLY: UpdateFrequency.BIMONTHLY,
108
+ EUFREQ.QUARTERLY: UpdateFrequency.QUARTERLY,
109
+ EUFREQ.ANNUAL_3: UpdateFrequency.THREE_TIMES_A_YEAR,
110
+ EUFREQ.ANNUAL_2: UpdateFrequency.SEMIANNUAL,
111
+ EUFREQ.ANNUAL: UpdateFrequency.ANNUAL,
112
+ EUFREQ.BIENNIAL: UpdateFrequency.BIENNIAL,
113
+ EUFREQ.TRIENNIAL: UpdateFrequency.TRIENNIAL,
114
+ EUFREQ.QUADRENNIAL: UpdateFrequency.QUADRENNIAL,
115
+ EUFREQ.QUINQUENNIAL: UpdateFrequency.QUINQUENNIAL,
116
+ EUFREQ.DECENNIAL: UpdateFrequency.DECENNIAL,
117
+ EUFREQ.BIDECENNIAL: UpdateFrequency.BIDECENNIAL,
118
+ EUFREQ.TRIDECENNIAL: UpdateFrequency.TRIDECENNIAL,
119
+ EUFREQ.AS_NEEDED: UpdateFrequency.PUNCTUAL,
120
+ EUFREQ.IRREG: UpdateFrequency.IRREGULAR,
121
+ EUFREQ.NEVER: UpdateFrequency.NEVER,
122
+ EUFREQ.NOT_PLANNED: UpdateFrequency.NOT_PLANNED,
123
+ EUFREQ.OTHER: UpdateFrequency.OTHER,
124
+ }
125
+ EUFREQ_ID_TO_UDATA = {
126
+ namespace_manager.compute_qname(k)[2].lower(): v for k, v in EUFREQ_TERM_TO_UDATA.items()
101
127
  }
102
128
 
129
+ # Merge order matters: we want FREQ to win over EUFREQ
130
+ UDATA_FREQ_ID_TO_TERM = {v: k for k, v in {**EUFREQ_TERM_TO_UDATA, **FREQ_TERM_TO_UDATA}.items()}
131
+
103
132
 
104
133
  def temporal_to_rdf(daterange: db.DateRange, graph: Graph | None = None) -> RdfResource | None:
105
134
  if not daterange:
@@ -114,10 +143,8 @@ def temporal_to_rdf(daterange: db.DateRange, graph: Graph | None = None) -> RdfR
114
143
  return pot
115
144
 
116
145
 
117
- def frequency_to_rdf(frequency: str, graph: Graph | None = None) -> str | None:
118
- if not frequency:
119
- return
120
- return RDF_FREQUENCIES.get(frequency, getattr(FREQ, frequency))
146
+ def frequency_to_rdf(frequency: UpdateFrequency | None, graph: Graph | None = None) -> str | None:
147
+ return UDATA_FREQ_ID_TO_TERM.get(frequency)
121
148
 
122
149
 
123
150
  def owner_to_rdf(dataset: Dataset, graph: Graph | None = None) -> RdfResource | None:
@@ -333,8 +360,7 @@ def dataset_to_rdf(dataset: Dataset, graph: Graph | None = None) -> RdfResource:
333
360
  if dataset.temporal_coverage:
334
361
  d.set(DCT.temporal, temporal_to_rdf(dataset.temporal_coverage, graph))
335
362
 
336
- frequency = frequency_to_rdf(dataset.frequency)
337
- if frequency:
363
+ if frequency := frequency_to_rdf(dataset.frequency):
338
364
  d.set(DCT.accrualPeriodicity, frequency)
339
365
 
340
366
  owner_role = DCT.publisher
@@ -510,23 +536,19 @@ def spatial_from_rdf(graph):
510
536
  return None
511
537
 
512
538
 
513
- def frequency_from_rdf(term):
539
+ def frequency_from_rdf(term) -> UpdateFrequency | None:
514
540
  if isinstance(term, str):
515
541
  try:
516
542
  term = URIRef(uris.validate(term))
517
543
  except uris.ValidationError:
518
544
  pass
519
545
  if isinstance(term, Literal):
520
- if term.toPython().lower() in UPDATE_FREQUENCIES:
521
- return term.toPython().lower()
546
+ term = term.toPython().lower()
547
+ return FREQ_ID_TO_UDATA.get(term) or EUFREQ_ID_TO_UDATA.get(term)
522
548
  if isinstance(term, RdfResource):
523
549
  term = term.identifier
524
550
  if isinstance(term, URIRef):
525
- if EUFREQ in term:
526
- return EU_RDF_REQUENCIES.get(term)
527
- _, _, freq = namespace_manager.compute_qname(term)
528
- if freq.lower() in UPDATE_FREQUENCIES:
529
- return freq.lower()
551
+ return FREQ_TERM_TO_UDATA.get(term) or EUFREQ_TERM_TO_UDATA.get(term)
530
552
 
531
553
 
532
554
  def mime_from_rdf(resource):
@@ -16,7 +16,7 @@ from udata.i18n import lazy_gettext as _
16
16
  from udata.models import Activity, Discussion, Follow, Organization, TopicElement, Transfer, db
17
17
  from udata.tasks import job
18
18
 
19
- from .constants import UPDATE_FREQUENCIES
19
+ from .constants import UpdateFrequency
20
20
  from .models import Checksum, CommunityResource, Dataset, Resource
21
21
 
22
22
  log = get_task_logger(__name__)
@@ -77,12 +77,7 @@ def purge_datasets(self):
77
77
 
78
78
  @job("send-frequency-reminder")
79
79
  def send_frequency_reminder(self):
80
- # We exclude irrelevant frequencies.
81
- frequencies = [
82
- f
83
- for f in UPDATE_FREQUENCIES.keys()
84
- if f not in ("unknown", "realtime", "punctual", "irregular", "continuous")
85
- ]
80
+ bounded_frequencies = [f.id for f in UpdateFrequency if f.delta is not None]
86
81
  now = datetime.utcnow()
87
82
  reminded_orgs = {}
88
83
  reminded_people = []
@@ -90,11 +85,11 @@ def send_frequency_reminder(self):
90
85
  for org in Organization.objects.visible():
91
86
  outdated_datasets = []
92
87
  for dataset in Dataset.objects.filter(
93
- frequency__in=frequencies, organization=org
88
+ frequency__in=bounded_frequencies, organization=org
94
89
  ).visible():
95
90
  if dataset.next_update + timedelta(days=allowed_delay) < now:
96
91
  dataset.outdated = now - dataset.next_update
97
- dataset.frequency_str = UPDATE_FREQUENCIES[dataset.frequency]
92
+ dataset.frequency_str = dataset.frequency.label
98
93
  outdated_datasets.append(dataset)
99
94
  if outdated_datasets:
100
95
  reminded_orgs[org] = outdated_datasets