udata 11.1.2.dev7__py3-none-any.whl → 11.1.2.dev11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of udata might be problematic. Click here for more details.

Files changed (53) hide show
  1. udata/api/oauth2.py +22 -3
  2. udata/app.py +3 -0
  3. udata/auth/__init__.py +11 -0
  4. udata/auth/forms.py +70 -3
  5. udata/auth/mails.py +6 -0
  6. udata/auth/proconnect.py +127 -0
  7. udata/auth/views.py +57 -2
  8. udata/commands/db.py +2 -3
  9. udata/core/__init__.py +2 -0
  10. udata/core/captchetat.py +80 -0
  11. udata/core/dataservices/api.py +1 -2
  12. udata/core/dataset/api.py +3 -4
  13. udata/core/dataset/api_fields.py +3 -4
  14. udata/core/dataset/apiv2.py +6 -6
  15. udata/core/dataset/commands.py +0 -10
  16. udata/core/dataset/constants.py +124 -38
  17. udata/core/dataset/factories.py +2 -1
  18. udata/core/dataset/forms.py +14 -10
  19. udata/core/dataset/models.py +8 -36
  20. udata/core/dataset/preview.py +3 -3
  21. udata/core/dataset/rdf.py +84 -65
  22. udata/core/dataset/tasks.py +2 -50
  23. udata/core/metrics/helpers.py +6 -7
  24. udata/core/metrics/tasks.py +3 -6
  25. udata/core/post/api.py +1 -2
  26. udata/core/reuse/api.py +1 -2
  27. udata/core/user/api.py +1 -3
  28. udata/cors.py +19 -2
  29. udata/harvest/backends/ckan/harvesters.py +10 -14
  30. udata/harvest/backends/maaf.py +15 -14
  31. udata/harvest/tests/ckan/test_ckan_backend.py +4 -3
  32. udata/harvest/tests/test_dcat_backend.py +3 -2
  33. udata/i18n.py +7 -32
  34. udata/migrations/2025-01-05-dataservices-fields-changes.py +1 -2
  35. udata/migrations/2025-09-04-update-legacy-frequencies.py +36 -0
  36. udata/settings.py +27 -0
  37. udata/templates/security/email/reset_instructions.html +1 -1
  38. udata/templates/security/email/reset_instructions.txt +1 -1
  39. udata/tests/api/test_datasets_api.py +41 -12
  40. udata/tests/dataset/test_dataset_model.py +17 -53
  41. udata/tests/dataset/test_dataset_rdf.py +27 -28
  42. udata/translations/udata.pot +226 -150
  43. udata/uris.py +1 -2
  44. udata/utils.py +8 -1
  45. {udata-11.1.2.dev7.dist-info → udata-11.1.2.dev11.dist-info}/METADATA +3 -4
  46. {udata-11.1.2.dev7.dist-info → udata-11.1.2.dev11.dist-info}/RECORD +50 -50
  47. udata/templates/mail/frequency_reminder.html +0 -34
  48. udata/templates/mail/frequency_reminder.txt +0 -18
  49. udata/tests/test_i18n.py +0 -93
  50. {udata-11.1.2.dev7.dist-info → udata-11.1.2.dev11.dist-info}/WHEEL +0 -0
  51. {udata-11.1.2.dev7.dist-info → udata-11.1.2.dev11.dist-info}/entry_points.txt +0 -0
  52. {udata-11.1.2.dev7.dist-info → udata-11.1.2.dev11.dist-info}/licenses/LICENSE +0 -0
  53. {udata-11.1.2.dev7.dist-info → udata-11.1.2.dev11.dist-info}/top_level.txt +0 -0
@@ -30,7 +30,7 @@ from .api_fields import (
30
30
  temporal_coverage_fields,
31
31
  user_ref_fields,
32
32
  )
33
- from .constants import DEFAULT_FREQUENCY, DEFAULT_LICENSE, FULL_OBJECTS_HEADER, UPDATE_FREQUENCIES
33
+ from .constants import DEFAULT_LICENSE, FULL_OBJECTS_HEADER, UpdateFrequency
34
34
  from .models import CommunityResource, Dataset
35
35
  from .search import DatasetSearch
36
36
 
@@ -157,13 +157,13 @@ dataset_fields = apiv2.model(
157
157
  ),
158
158
  "frequency": fields.Raw(
159
159
  attribute=lambda d: {
160
- "id": d.frequency or DEFAULT_FREQUENCY,
161
- "label": UPDATE_FREQUENCIES.get(d.frequency or DEFAULT_FREQUENCY),
160
+ "id": (d.frequency or UpdateFrequency.UNKNOWN).id,
161
+ "label": (d.frequency or UpdateFrequency.UNKNOWN).label,
162
162
  }
163
163
  if request.headers.get(FULL_OBJECTS_HEADER, False, bool)
164
- else d.frequency,
165
- enum=list(UPDATE_FREQUENCIES),
166
- default=DEFAULT_FREQUENCY,
164
+ else (d.frequency or UpdateFrequency.UNKNOWN),
165
+ enum=list(UpdateFrequency),
166
+ default=UpdateFrequency.UNKNOWN,
167
167
  required=True,
168
168
  description="The update frequency (full Frequency object if `X-Get-Datasets-Full-Objects` is set, ID of the frequency otherwise)",
169
169
  ),
@@ -10,7 +10,6 @@ from udata.core.dataset.constants import DEFAULT_LICENSE
10
10
  from udata.models import Dataset, License
11
11
 
12
12
  from . import actions
13
- from .tasks import send_frequency_reminder
14
13
 
15
14
  log = logging.getLogger(__name__)
16
15
 
@@ -66,15 +65,6 @@ def licenses(source=DEFAULT_LICENSE_FILE):
66
65
  success("Done")
67
66
 
68
67
 
69
- @cli.command()
70
- def frequency_reminder():
71
- """Send a unique email per organization to members
72
-
73
- to remind them they have outdated datasets on the website.
74
- """
75
- send_frequency_reminder()
76
-
77
-
78
68
  @cli.group("dataset")
79
69
  def grp():
80
70
  """Dataset related operations"""
@@ -1,48 +1,134 @@
1
1
  from collections import OrderedDict
2
+ from datetime import datetime, timedelta
3
+ from enum import StrEnum, auto
4
+
5
+ from flask_babel import LazyString
2
6
 
3
7
  from udata.i18n import lazy_gettext as _
4
8
 
5
- #: Udata frequencies with their labels
6
- #:
7
- #: See: http://dublincore.org/groups/collections/frequency/
8
- UPDATE_FREQUENCIES = OrderedDict(
9
- [ # Dublin core equivalent
10
- ("unknown", _("Unknown")), # N/A
11
- ("punctual", _("Punctual")), # N/A
12
- ("continuous", _("Real time")), # freq:continuous
13
- ("hourly", _("Hourly")), # N/A
14
- ("fourTimesADay", _("Four times a day")), # N/A
15
- ("threeTimesADay", _("Three times a day")), # N/A
16
- ("semidaily", _("Semidaily")), # N/A
17
- ("daily", _("Daily")), # freq:daily
18
- ("fourTimesAWeek", _("Four times a week")), # N/A
19
- ("threeTimesAWeek", _("Three times a week")), # freq:threeTimesAWeek
20
- ("semiweekly", _("Semiweekly")), # freq:semiweekly
21
- ("weekly", _("Weekly")), # freq:weekly
22
- ("biweekly", _("Biweekly")), # freq:bimonthly
23
- ("threeTimesAMonth", _("Three times a month")), # freq:threeTimesAMonth
24
- ("semimonthly", _("Semimonthly")), # freq:semimonthly
25
- ("monthly", _("Monthly")), # freq:monthly
26
- ("bimonthly", _("Bimonthly")), # freq:bimonthly
27
- ("quarterly", _("Quarterly")), # freq:quarterly
28
- ("threeTimesAYear", _("Three times a year")), # freq:threeTimesAYear
29
- ("semiannual", _("Biannual")), # freq:semiannual
30
- ("annual", _("Annual")), # freq:annual
31
- ("biennial", _("Biennial")), # freq:biennial
32
- ("triennial", _("Triennial")), # freq:triennial
33
- ("quinquennial", _("Quinquennial")), # N/A
34
- ("irregular", _("Irregular")), # freq:irregular
35
- ]
36
- )
37
9
 
38
- #: Map legacy frequencies to currents
39
- LEGACY_FREQUENCIES = {
40
- "fortnighly": "biweekly",
41
- "biannual": "semiannual",
42
- "realtime": "continuous",
10
+ class UpdateFrequency(StrEnum):
11
+ """
12
+ Udata frequency vocabulary
13
+
14
+ Based on the following vocabularies:
15
+ - DC: http://dublincore.org/groups/collections/frequency/
16
+ - EU: https://publications.europa.eu/en/web/eu-vocabularies/at-dataset/-/resource/dataset/frequency
17
+ """
18
+
19
+ CONTINUOUS = auto(), _("Real time"), None # DC, EU:UPDATE_CONT
20
+ ONE_MINUTE = "oneMinute", _("Every minute"), timedelta(minutes=1) # EU:1MIN
21
+ FIVE_MINUTES = "fiveMinutes", _("Every five minutes"), timedelta(minutes=5) # EU:5MIN
22
+ TEN_MINUTES = "tenMinutes", _("Every ten minutes"), timedelta(minutes=10) # EU:10MIN
23
+ FIFTEEN_MINUTES = (
24
+ "fifteenMinutes",
25
+ _("Every fifteen minutes"),
26
+ timedelta(minutes=15),
27
+ ) # EU:15MIN
28
+ THIRTY_MINUTES = "thirtyMinutes", _("Every thirty minute"), timedelta(minutes=30) # EU:30MIN
29
+ HOURLY = auto(), _("Every hour"), timedelta(hours=1) # EU
30
+ BIHOURLY = auto(), _("Every two hours"), timedelta(hours=2) # EU
31
+ TRIHOURLY = auto(), _("Every three hours"), timedelta(hours=3) # EU
32
+ TWELVE_HOURS = "twelveHours", _("Every twelve hours"), timedelta(hours=12) # EU:12HRS
33
+ SEVERAL_TIMES_A_DAY = "severalTimesADay", _("Several times a day"), timedelta(days=1) # EU:CONT
34
+ THREE_TIMES_A_DAY = "threeTimesADay", _("Three times a day"), timedelta(days=1) # EU:DAILY_3
35
+ SEMIDAILY = auto(), _("Twice a day"), timedelta(days=1) # EU:DAILY_2
36
+ DAILY = auto(), _("Daily"), timedelta(days=1) # DC, EU
37
+ FIVE_TIMES_A_WEEK = "fiveTimesAWeek", _("Five times a week"), timedelta(weeks=1) # EU:WEEKLY_5
38
+ THREE_TIMES_A_WEEK = (
39
+ "threeTimesAWeek",
40
+ _("Three times a week"),
41
+ timedelta(weeks=1),
42
+ ) # DC, EU:WEEKLY_3
43
+ SEMIWEEKLY = auto(), _("Twice a week"), timedelta(weeks=1) # DC, EU:WEEKLY_2
44
+ WEEKLY = auto(), _("Weekly"), timedelta(weeks=1) # DC, EU
45
+ BIWEEKLY = auto(), _("Every two weeks"), timedelta(weeks=2) # DC, EU
46
+ THREE_TIMES_A_MONTH = (
47
+ "threeTimesAMonth",
48
+ _("Three times a month"),
49
+ timedelta(days=31),
50
+ ) # DC, EU:MONTHLY_3
51
+ SEMIMONTHLY = auto(), _("Twice a month"), timedelta(days=31) # DC, EU:MONTHLY_2
52
+ MONTHLY = auto(), _("Monthly"), timedelta(days=31) # DC, EU
53
+ BIMONTHLY = auto(), _("Every two months"), timedelta(days=31 * 2) # DC, EU
54
+ QUARTERLY = auto(), _("Quarterly"), timedelta(days=31 * 3) # DC, EU
55
+ THREE_TIMES_A_YEAR = (
56
+ "threeTimesAYear",
57
+ _("Three times a year"),
58
+ timedelta(days=365),
59
+ ) # DC, EU:ANNUAL_3
60
+ SEMIANNUAL = auto(), _("Twice a year"), timedelta(days=365) # DC, EU:ANNUAL_2
61
+ ANNUAL = auto(), _("Annually"), timedelta(days=365) # DC, EU
62
+ BIENNIAL = auto(), _("Every two years"), timedelta(days=365 * 2) # DC, EU
63
+ TRIENNIAL = auto(), _("Every three years"), timedelta(days=365 * 3) # DC, EU
64
+ QUADRENNIAL = auto(), _("Every four years"), timedelta(days=365 * 4) # EU
65
+ QUINQUENNIAL = auto(), _("Every five years"), timedelta(days=365 * 5) # EU
66
+ DECENNIAL = auto(), _("Every ten years"), timedelta(days=365 * 10) # EU
67
+ BIDECENNIAL = auto(), _("Every twenty years"), timedelta(days=365 * 20) # EU
68
+ TRIDECENNIAL = auto(), _("Every thirty years"), timedelta(days=365 * 30) # EU
69
+ PUNCTUAL = auto(), _("Punctual"), None # EU:AS_NEEDED
70
+ IRREGULAR = auto(), _("Irregular"), None # DC, EU:IRREG
71
+ NEVER = auto(), _("Never"), None # EU
72
+ NOT_PLANNED = "notPlanned", _("Not planned"), None # EU:NOT_PLANNED
73
+ OTHER = auto(), _("Other"), None # EU
74
+ UNKNOWN = auto(), _("Unknown"), None # EU
75
+
76
+ def __new__(cls, id: str, label: LazyString, delta: timedelta | None):
77
+ # Set _value_ so the enum value-based lookup depends only on the id field.
78
+ # See https://docs.python.org/3/howto/enum.html#when-to-use-new-vs-init
79
+ obj = str.__new__(cls, id)
80
+ obj._value_ = id
81
+ obj._label = label # type: ignore[misc]
82
+ obj._delta = delta # type: ignore[misc]
83
+ return obj
84
+
85
+ @classmethod
86
+ def _missing_(cls, value) -> "UpdateFrequency | None":
87
+ if isinstance(value, str):
88
+ return UpdateFrequency._LEGACY_FREQUENCIES.get(value) # type: ignore[misc]
89
+
90
+ @property
91
+ def id(self) -> str:
92
+ return self.value
93
+
94
+ @property
95
+ def label(self) -> LazyString:
96
+ return self._label # type: ignore[misc]
97
+
98
+ @property
99
+ def delta(self) -> timedelta | None:
100
+ return self._delta # type: ignore[misc]
101
+
102
+ def next_update(self, last_update: datetime) -> datetime | None:
103
+ return last_update + self.delta if self.delta else None
104
+
105
+
106
+ # We must declare UpdateFrequency class variables after the Enum magic
107
+ # happens, so outside of class declaration.
108
+ #
109
+ # The alternative method based on _ignore_ breaks accessing the class
110
+ # variables from outside the class, because accesses to will go
111
+ # through __getattr__ as if it were an Enum entry.
112
+ #
113
+ # FIXME(python 3.13+): Use Enum._add_value_alias_ instead:
114
+ #
115
+ # UNKNOWN = auto(), _("Unknown"), None, []
116
+ # CONTINUOUS = auto(), _("Real time"), None, ["realtime"]
117
+ # SEVERAL_TIMES_A_DAY = "severalTimesADay", ..., ["fourTimesADay"]
118
+ #
119
+ # def __new__(cls, id: str, ..., aliases: list[str]):
120
+ # ...
121
+ # for alias in aliases:
122
+ # obj._add_value_alias_(alias)
123
+ #
124
+ UpdateFrequency._LEGACY_FREQUENCIES = { # type: ignore[misc]
125
+ "realtime": UpdateFrequency.CONTINUOUS,
126
+ "fourTimesADay": UpdateFrequency.SEVERAL_TIMES_A_DAY,
127
+ "fourTimesAWeek": UpdateFrequency.OTHER,
128
+ "fortnighly": UpdateFrequency.BIWEEKLY,
129
+ "biannual": UpdateFrequency.SEMIANNUAL,
43
130
  }
44
131
 
45
- DEFAULT_FREQUENCY = "unknown"
46
132
 
47
133
  DEFAULT_LICENSE = {
48
134
  "id": "notspecified",
@@ -8,6 +8,7 @@ from udata.core.organization.factories import OrganizationFactory
8
8
  from udata.core.spatial.factories import SpatialCoverageFactory
9
9
  from udata.factories import ModelFactory
10
10
 
11
+ from .constants import UpdateFrequency
11
12
  from .models import Checksum, CommunityResource, Dataset, License, Resource
12
13
 
13
14
 
@@ -17,7 +18,7 @@ class DatasetFactory(ModelFactory):
17
18
 
18
19
  title = factory.Faker("sentence")
19
20
  description = factory.Faker("text")
20
- frequency = "unknown"
21
+ frequency = UpdateFrequency.UNKNOWN
21
22
  resources = factory.LazyAttribute(lambda o: ResourceFactory.build_batch(o.nb_resources))
22
23
 
23
24
  class Params:
@@ -6,14 +6,12 @@ from udata.mongo.errors import FieldValidationError
6
6
 
7
7
  from .constants import (
8
8
  CHECKSUM_TYPES,
9
- DEFAULT_FREQUENCY,
10
9
  DESCRIPTION_SHORT_SIZE_LIMIT,
11
10
  DESCRIPTION_SIZE_LIMIT,
12
- LEGACY_FREQUENCIES,
13
11
  RESOURCE_FILETYPES,
14
12
  RESOURCE_TYPES,
15
13
  TITLE_SIZE_LIMIT,
16
- UPDATE_FREQUENCIES,
14
+ UpdateFrequency,
17
15
  )
18
16
  from .models import (
19
17
  Checksum,
@@ -117,10 +115,11 @@ class CommunityResourceForm(BaseResourceForm):
117
115
  organization = fields.PublishAsField(_("Publish as"))
118
116
 
119
117
 
120
- def map_legacy_frequencies(form, field):
121
- """Map legacy frequencies to new ones"""
122
- if field.data in LEGACY_FREQUENCIES:
123
- field.data = LEGACY_FREQUENCIES[field.data]
118
+ def unmarshal_frequency(form, field):
119
+ # We don't need to worry about invalid field.data being fed to UpdateFrequency here,
120
+ # since the API will already have ensured incoming data matches the field definition,
121
+ # which in our case is an enum of valid UpdateFrequency values.
122
+ field.data = UpdateFrequency(field.data)
124
123
 
125
124
 
126
125
  def validate_contact_point(form, field):
@@ -160,10 +159,15 @@ class DatasetForm(ModelForm):
160
159
  license = fields.ModelSelectField(_("License"), model=License, allow_blank=True)
161
160
  frequency = fields.SelectField(
162
161
  _("Update frequency"),
163
- choices=list(UPDATE_FREQUENCIES.items()),
164
- default=DEFAULT_FREQUENCY,
162
+ choices=list(UpdateFrequency),
163
+ default=UpdateFrequency.UNKNOWN,
165
164
  validators=[validators.optional()],
166
- preprocessors=[map_legacy_frequencies],
165
+ # Unmarshaling should not happen during validation, but flask-restx makes it cumbersome
166
+ # to do it earlier, requiring a request parser (unmarshaler) separate from the marshaler,
167
+ # meaning we can't use the same object for @api.expect and @api.marshal_with.
168
+ # This should get better once flask-restx moves to something like marshmallow, which
169
+ # handles marshaling/unmarshaling more symmetrically and in the same object.
170
+ preprocessors=[unmarshal_frequency],
167
171
  description=_("The frequency at which data are updated."),
168
172
  )
169
173
  frequency_date = fields.DateTimeField(_("Expected frequency date"))
@@ -38,7 +38,6 @@ from .constants import (
38
38
  DESCRIPTION_SHORT_SIZE_LIMIT,
39
39
  HVD,
40
40
  INSPIRE,
41
- LEGACY_FREQUENCIES,
42
41
  MAX_DISTANCE,
43
42
  PIVOTAL_DATA,
44
43
  RESOURCE_FILETYPES,
@@ -47,7 +46,7 @@ from .constants import (
47
46
  SL,
48
47
  SPD,
49
48
  SR,
50
- UPDATE_FREQUENCIES,
49
+ UpdateFrequency,
51
50
  )
52
51
  from .exceptions import (
53
52
  SchemasCacheUnavailableException,
@@ -580,7 +579,8 @@ class Dataset(Auditable, WithMetrics, DatasetBadgeMixin, Owned, Linkable, db.Doc
580
579
  resources = field(db.ListField(db.EmbeddedDocumentField(Resource)), auditable=False)
581
580
 
582
581
  private = field(db.BooleanField(default=False))
583
- frequency = field(db.StringField(choices=list(UPDATE_FREQUENCIES.keys())))
582
+
583
+ frequency = field(db.EnumField(UpdateFrequency))
584
584
  frequency_date = field(db.DateTimeField(verbose_name=_("Future date of update")))
585
585
  temporal_coverage = field(db.EmbeddedDocumentField(db.DateRange))
586
586
  spatial = field(db.EmbeddedDocumentField(SpatialCoverage))
@@ -703,8 +703,6 @@ class Dataset(Auditable, WithMetrics, DatasetBadgeMixin, Owned, Linkable, db.Doc
703
703
 
704
704
  def clean(self):
705
705
  super(Dataset, self).clean()
706
- if self.frequency in LEGACY_FREQUENCIES:
707
- self.frequency = LEGACY_FREQUENCIES[self.frequency]
708
706
 
709
707
  if len(set(res.id for res in self.resources)) != len(self.resources):
710
708
  raise MongoEngineValidationError(f"Duplicate resource ID in dataset #{self.id}.")
@@ -782,8 +780,8 @@ class Dataset(Auditable, WithMetrics, DatasetBadgeMixin, Owned, Linkable, db.Doc
782
780
  return self.owner.avatar.url
783
781
 
784
782
  @property
785
- def frequency_label(self):
786
- return UPDATE_FREQUENCIES.get(self.frequency or "unknown", UPDATE_FREQUENCIES["unknown"])
783
+ def has_frequency(self):
784
+ return self.frequency not in [None, UpdateFrequency.UNKNOWN]
787
785
 
788
786
  def check_availability(self):
789
787
  """Check if resources from that dataset are available.
@@ -835,33 +833,7 @@ class Dataset(Auditable, WithMetrics, DatasetBadgeMixin, Owned, Linkable, db.Doc
835
833
  Ex: the next update for a threeTimesAday freq is not
836
834
  every 8 hours, but is maximum 24 hours later.
837
835
  """
838
- delta = None
839
- if self.frequency == "hourly":
840
- delta = timedelta(hours=1)
841
- elif self.frequency in ["fourTimesADay", "threeTimesADay", "semidaily", "daily"]:
842
- delta = timedelta(days=1)
843
- elif self.frequency in ["fourTimesAWeek", "threeTimesAWeek", "semiweekly", "weekly"]:
844
- delta = timedelta(weeks=1)
845
- elif self.frequency == "biweekly":
846
- delta = timedelta(weeks=2)
847
- elif self.frequency in ["threeTimesAMonth", "semimonthly", "monthly"]:
848
- delta = timedelta(days=31)
849
- elif self.frequency == "bimonthly":
850
- delta = timedelta(days=31 * 2)
851
- elif self.frequency == "quarterly":
852
- delta = timedelta(days=365 / 4)
853
- elif self.frequency in ["threeTimesAYear", "semiannual", "annual"]:
854
- delta = timedelta(days=365)
855
- elif self.frequency == "biennial":
856
- delta = timedelta(days=365 * 2)
857
- elif self.frequency == "triennial":
858
- delta = timedelta(days=365 * 3)
859
- elif self.frequency == "quinquennial":
860
- delta = timedelta(days=365 * 5)
861
- if delta is None:
862
- return
863
- else:
864
- return self.last_update + delta
836
+ return self.frequency.next_update(self.last_update) if self.has_frequency else None
865
837
 
866
838
  @property
867
839
  def quality(self):
@@ -880,7 +852,7 @@ class Dataset(Auditable, WithMetrics, DatasetBadgeMixin, Owned, Linkable, db.Doc
880
852
  # Allow for being one day late on update.
881
853
  # We may have up to one day delay due to harvesting for example
882
854
  quality["update_fulfilled_in_time"] = (next_update - datetime.utcnow()).days >= -1
883
- elif self.frequency in ["continuous", "irregular", "punctual"]:
855
+ elif self.has_frequency and self.frequency.delta is None:
884
856
  # For these frequencies, we don't expect regular updates or can't quantify them.
885
857
  # Thus we consider the update_fulfilled_in_time quality criterion to be true.
886
858
  quality["update_fulfilled_in_time"] = True
@@ -905,7 +877,7 @@ class Dataset(Auditable, WithMetrics, DatasetBadgeMixin, Owned, Linkable, db.Doc
905
877
  result["temporal_coverage"] = True if self.temporal_coverage else False
906
878
  result["spatial"] = True if self.spatial else False
907
879
 
908
- result["update_frequency"] = self.frequency and self.frequency != "unknown"
880
+ result["update_frequency"] = self.has_frequency
909
881
 
910
882
  # We only save the next_update here because it is based on resources
911
883
  # We cannot save the `update_fulfilled_in_time` because it is time
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from abc import ABC, abstractmethod
4
- from typing import TYPE_CHECKING, Optional
4
+ from typing import TYPE_CHECKING
5
5
 
6
6
  from flask import current_app
7
7
 
@@ -12,12 +12,12 @@ if TYPE_CHECKING:
12
12
  # Define an abstract class
13
13
  class Preview(ABC):
14
14
  @abstractmethod
15
- def preview_url(self, resource: Resource) -> Optional[str]:
15
+ def preview_url(self, resource: Resource) -> str | None:
16
16
  return None
17
17
 
18
18
 
19
19
  class TabularAPIPreview(Preview):
20
- def preview_url(self, resource: Resource) -> Optional[str]:
20
+ def preview_url(self, resource: Resource) -> str | None:
21
21
  preview_base_url = current_app.config["TABULAR_EXPLORE_URL"]
22
22
  if not preview_base_url:
23
23
  return None
udata/core/dataset/rdf.py CHANGED
@@ -6,7 +6,6 @@ import calendar
6
6
  import json
7
7
  import logging
8
8
  from datetime import date, datetime
9
- from typing import Optional
10
9
 
11
10
  from dateutil.parser import parse as parse_dt
12
11
  from flask import current_app
@@ -53,58 +52,85 @@ from udata.rdf import (
53
52
  )
54
53
  from udata.utils import get_by, safe_unicode, to_naive_datetime
55
54
 
56
- from .constants import OGC_SERVICE_FORMATS, UPDATE_FREQUENCIES
55
+ from .constants import OGC_SERVICE_FORMATS, UpdateFrequency
57
56
  from .models import Checksum, Dataset, License, Resource
58
57
 
59
58
  log = logging.getLogger(__name__)
60
59
 
61
- # Map extra frequencies (ie. not defined in Dublin Core) to closest equivalent
62
- RDF_FREQUENCIES = {
63
- "punctual": None,
64
- "hourly": FREQ.continuous,
65
- "fourTimesADay": FREQ.daily,
66
- "threeTimesADay": FREQ.daily,
67
- "semidaily": FREQ.daily,
68
- "fourTimesAWeek": FREQ.threeTimesAWeek,
69
- "quinquennial": None,
70
- "unknown": None,
60
+ FREQ_TERM_TO_UDATA = {
61
+ FREQ.continuous: UpdateFrequency.CONTINUOUS,
62
+ FREQ.daily: UpdateFrequency.DAILY,
63
+ FREQ.threeTimesAWeek: UpdateFrequency.THREE_TIMES_A_WEEK,
64
+ FREQ.semiweekly: UpdateFrequency.SEMIWEEKLY,
65
+ FREQ.weekly: UpdateFrequency.WEEKLY,
66
+ FREQ.biweekly: UpdateFrequency.BIWEEKLY,
67
+ FREQ.threeTimesAMonth: UpdateFrequency.THREE_TIMES_A_MONTH,
68
+ FREQ.semimonthly: UpdateFrequency.SEMIMONTHLY,
69
+ FREQ.monthly: UpdateFrequency.MONTHLY,
70
+ FREQ.bimonthly: UpdateFrequency.BIMONTHLY,
71
+ FREQ.quarterly: UpdateFrequency.QUARTERLY,
72
+ FREQ.threeTimesAYear: UpdateFrequency.THREE_TIMES_A_YEAR,
73
+ FREQ.semiannual: UpdateFrequency.SEMIANNUAL,
74
+ FREQ.annual: UpdateFrequency.ANNUAL,
75
+ FREQ.biennial: UpdateFrequency.BIENNIAL,
76
+ FREQ.triennial: UpdateFrequency.TRIENNIAL,
77
+ FREQ.irregular: UpdateFrequency.IRREGULAR,
78
+ }
79
+ FREQ_ID_TO_UDATA = {
80
+ namespace_manager.compute_qname(k)[2].lower(): v for k, v in FREQ_TERM_TO_UDATA.items()
71
81
  }
72
82
 
73
- # Map european frequencies to their closest equivalent
74
- # See:
75
- # - http://publications.europa.eu/mdr/resource/authority/frequency/html/frequencies-eng.html # noqa: E501
76
- # - https://publications.europa.eu/en/web/eu-vocabularies/at-dataset/-/resource/dataset/frequency # noqa: E501
77
- EU_RDF_REQUENCIES = {
78
- # Match Dublin Core name
79
- EUFREQ.ANNUAL: "annual",
80
- EUFREQ.BIENNIAL: "biennial",
81
- EUFREQ.TRIENNIAL: "triennial",
82
- EUFREQ.QUARTERLY: "quarterly",
83
- EUFREQ.MONTHLY: "monthly",
84
- EUFREQ.BIMONTHLY: "bimonthly",
85
- EUFREQ.WEEKLY: "weekly",
86
- EUFREQ.BIWEEKLY: "biweekly",
87
- EUFREQ.DAILY: "daily",
88
- # Name differs from Dublin Core
89
- EUFREQ.ANNUAL_2: "semiannual",
90
- EUFREQ.ANNUAL_3: "threeTimesAYear",
91
- EUFREQ.MONTHLY_2: "semimonthly",
92
- EUFREQ.MONTHLY_3: "threeTimesAMonth",
93
- EUFREQ.WEEKLY_2: "semiweekly",
94
- EUFREQ.WEEKLY_3: "threeTimesAWeek",
95
- EUFREQ.DAILY_2: "semidaily",
96
- EUFREQ.CONT: "continuous",
97
- EUFREQ.UPDATE_CONT: "continuous",
98
- EUFREQ.IRREG: "irregular",
99
- EUFREQ.UNKNOWN: "unknown",
100
- EUFREQ.OTHER: "unknown",
101
- EUFREQ.NEVER: "punctual",
83
+ EUFREQ_TERM_TO_UDATA = {
84
+ EUFREQ.UNKNOWN: UpdateFrequency.UNKNOWN,
85
+ EUFREQ.UPDATE_CONT: UpdateFrequency.CONTINUOUS,
86
+ getattr(EUFREQ, "1MIN"): UpdateFrequency.ONE_MINUTE,
87
+ getattr(EUFREQ, "5MIN"): UpdateFrequency.FIVE_MINUTES,
88
+ getattr(EUFREQ, "10MIN"): UpdateFrequency.TEN_MINUTES,
89
+ getattr(EUFREQ, "15MIN"): UpdateFrequency.FIFTEEN_MINUTES,
90
+ getattr(EUFREQ, "30MIN"): UpdateFrequency.THIRTY_MINUTES,
91
+ EUFREQ.HOURLY: UpdateFrequency.HOURLY,
92
+ EUFREQ.BIHOURLY: UpdateFrequency.BIHOURLY,
93
+ EUFREQ.TRIHOURLY: UpdateFrequency.TRIHOURLY,
94
+ getattr(EUFREQ, "12HRS"): UpdateFrequency.TWELVE_HOURS,
95
+ EUFREQ.CONT: UpdateFrequency.SEVERAL_TIMES_A_DAY,
96
+ EUFREQ.DAILY_3: UpdateFrequency.THREE_TIMES_A_DAY,
97
+ EUFREQ.DAILY_2: UpdateFrequency.SEMIDAILY,
98
+ EUFREQ.DAILY: UpdateFrequency.DAILY,
99
+ EUFREQ.WEEKLY_5: UpdateFrequency.FIVE_TIMES_A_WEEK,
100
+ EUFREQ.WEEKLY_3: UpdateFrequency.THREE_TIMES_A_WEEK,
101
+ EUFREQ.WEEKLY_2: UpdateFrequency.SEMIWEEKLY,
102
+ EUFREQ.WEEKLY: UpdateFrequency.WEEKLY,
103
+ EUFREQ.BIWEEKLY: UpdateFrequency.BIWEEKLY,
104
+ EUFREQ.MONTHLY_3: UpdateFrequency.THREE_TIMES_A_MONTH,
105
+ EUFREQ.MONTHLY_2: UpdateFrequency.SEMIMONTHLY,
106
+ EUFREQ.MONTHLY: UpdateFrequency.MONTHLY,
107
+ EUFREQ.BIMONTHLY: UpdateFrequency.BIMONTHLY,
108
+ EUFREQ.QUARTERLY: UpdateFrequency.QUARTERLY,
109
+ EUFREQ.ANNUAL_3: UpdateFrequency.THREE_TIMES_A_YEAR,
110
+ EUFREQ.ANNUAL_2: UpdateFrequency.SEMIANNUAL,
111
+ EUFREQ.ANNUAL: UpdateFrequency.ANNUAL,
112
+ EUFREQ.BIENNIAL: UpdateFrequency.BIENNIAL,
113
+ EUFREQ.TRIENNIAL: UpdateFrequency.TRIENNIAL,
114
+ EUFREQ.QUADRENNIAL: UpdateFrequency.QUADRENNIAL,
115
+ EUFREQ.QUINQUENNIAL: UpdateFrequency.QUINQUENNIAL,
116
+ EUFREQ.DECENNIAL: UpdateFrequency.DECENNIAL,
117
+ EUFREQ.BIDECENNIAL: UpdateFrequency.BIDECENNIAL,
118
+ EUFREQ.TRIDECENNIAL: UpdateFrequency.TRIDECENNIAL,
119
+ EUFREQ.AS_NEEDED: UpdateFrequency.PUNCTUAL,
120
+ EUFREQ.IRREG: UpdateFrequency.IRREGULAR,
121
+ EUFREQ.NEVER: UpdateFrequency.NEVER,
122
+ EUFREQ.NOT_PLANNED: UpdateFrequency.NOT_PLANNED,
123
+ EUFREQ.OTHER: UpdateFrequency.OTHER,
124
+ }
125
+ EUFREQ_ID_TO_UDATA = {
126
+ namespace_manager.compute_qname(k)[2].lower(): v for k, v in EUFREQ_TERM_TO_UDATA.items()
102
127
  }
103
128
 
129
+ # Merge order matters: we want FREQ to win over EUFREQ
130
+ UDATA_FREQ_ID_TO_TERM = {v: k for k, v in {**EUFREQ_TERM_TO_UDATA, **FREQ_TERM_TO_UDATA}.items()}
104
131
 
105
- def temporal_to_rdf(
106
- daterange: db.DateRange, graph: Optional[Graph] = None
107
- ) -> Optional[RdfResource]:
132
+
133
+ def temporal_to_rdf(daterange: db.DateRange, graph: Graph | None = None) -> RdfResource | None:
108
134
  if not daterange:
109
135
  return
110
136
  graph = graph or Graph(namespace_manager=namespace_manager)
@@ -117,13 +143,11 @@ def temporal_to_rdf(
117
143
  return pot
118
144
 
119
145
 
120
- def frequency_to_rdf(frequency: str, graph: Optional[Graph] = None) -> Optional[str]:
121
- if not frequency:
122
- return
123
- return RDF_FREQUENCIES.get(frequency, getattr(FREQ, frequency))
146
+ def frequency_to_rdf(frequency: UpdateFrequency | None, graph: Graph | None = None) -> str | None:
147
+ return UDATA_FREQ_ID_TO_TERM.get(frequency)
124
148
 
125
149
 
126
- def owner_to_rdf(dataset: Dataset, graph: Optional[Graph] = None) -> Optional[RdfResource]:
150
+ def owner_to_rdf(dataset: Dataset, graph: Graph | None = None) -> RdfResource | None:
127
151
  from udata.core.organization.rdf import organization_to_rdf
128
152
  from udata.core.user.rdf import user_to_rdf
129
153
 
@@ -134,7 +158,7 @@ def owner_to_rdf(dataset: Dataset, graph: Optional[Graph] = None) -> Optional[Rd
134
158
  return
135
159
 
136
160
 
137
- def detect_ogc_service(resource: Resource) -> Optional[str]:
161
+ def detect_ogc_service(resource: Resource) -> str | None:
138
162
  """
139
163
  Detect if the resource points towards an OGC Service based on either
140
164
  * a known OGC Service format
@@ -153,8 +177,8 @@ def detect_ogc_service(resource: Resource) -> Optional[str]:
153
177
  def ogc_service_to_rdf(
154
178
  dataset: Dataset,
155
179
  resource: Resource,
156
- ogc_service_type: Optional[str] = None,
157
- graph: Optional[Graph] = None,
180
+ ogc_service_type: str | None = None,
181
+ graph: Graph | None = None,
158
182
  is_hvd: bool = False,
159
183
  ) -> RdfResource:
160
184
  """
@@ -196,8 +220,8 @@ def ogc_service_to_rdf(
196
220
 
197
221
  def resource_to_rdf(
198
222
  resource: Resource,
199
- dataset: Optional[Dataset] = None,
200
- graph: Optional[Graph] = None,
223
+ dataset: Dataset | None = None,
224
+ graph: Graph | None = None,
201
225
  is_hvd: bool = False,
202
226
  ) -> RdfResource:
203
227
  """
@@ -261,7 +285,7 @@ def dataset_to_graph_id(dataset: Dataset) -> URIRef | BNode:
261
285
  return BNode()
262
286
 
263
287
 
264
- def dataset_to_rdf(dataset: Dataset, graph: Optional[Graph] = None) -> RdfResource:
288
+ def dataset_to_rdf(dataset: Dataset, graph: Graph | None = None) -> RdfResource:
265
289
  """
266
290
  Map a dataset domain model to a DCAT/RDF graph
267
291
  """
@@ -336,8 +360,7 @@ def dataset_to_rdf(dataset: Dataset, graph: Optional[Graph] = None) -> RdfResour
336
360
  if dataset.temporal_coverage:
337
361
  d.set(DCT.temporal, temporal_to_rdf(dataset.temporal_coverage, graph))
338
362
 
339
- frequency = frequency_to_rdf(dataset.frequency)
340
- if frequency:
363
+ if frequency := frequency_to_rdf(dataset.frequency):
341
364
  d.set(DCT.accrualPeriodicity, frequency)
342
365
 
343
366
  owner_role = DCT.publisher
@@ -513,23 +536,19 @@ def spatial_from_rdf(graph):
513
536
  return None
514
537
 
515
538
 
516
- def frequency_from_rdf(term):
539
+ def frequency_from_rdf(term) -> UpdateFrequency | None:
517
540
  if isinstance(term, str):
518
541
  try:
519
542
  term = URIRef(uris.validate(term))
520
543
  except uris.ValidationError:
521
544
  pass
522
545
  if isinstance(term, Literal):
523
- if term.toPython().lower() in UPDATE_FREQUENCIES:
524
- return term.toPython().lower()
546
+ term = term.toPython().lower()
547
+ return FREQ_ID_TO_UDATA.get(term) or EUFREQ_ID_TO_UDATA.get(term)
525
548
  if isinstance(term, RdfResource):
526
549
  term = term.identifier
527
550
  if isinstance(term, URIRef):
528
- if EUFREQ in term:
529
- return EU_RDF_REQUENCIES.get(term)
530
- _, _, freq = namespace_manager.compute_qname(term)
531
- if freq.lower() in UPDATE_FREQUENCIES:
532
- return freq.lower()
551
+ return FREQ_TERM_TO_UDATA.get(term) or EUFREQ_TERM_TO_UDATA.get(term)
533
552
 
534
553
 
535
554
  def mime_from_rdf(resource):