udata 11.1.2.dev7__py3-none-any.whl → 11.1.2.dev11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of udata might be problematic. Click here for more details.
- udata/api/oauth2.py +22 -3
- udata/app.py +3 -0
- udata/auth/__init__.py +11 -0
- udata/auth/forms.py +70 -3
- udata/auth/mails.py +6 -0
- udata/auth/proconnect.py +127 -0
- udata/auth/views.py +57 -2
- udata/commands/db.py +2 -3
- udata/core/__init__.py +2 -0
- udata/core/captchetat.py +80 -0
- udata/core/dataservices/api.py +1 -2
- udata/core/dataset/api.py +3 -4
- udata/core/dataset/api_fields.py +3 -4
- udata/core/dataset/apiv2.py +6 -6
- udata/core/dataset/commands.py +0 -10
- udata/core/dataset/constants.py +124 -38
- udata/core/dataset/factories.py +2 -1
- udata/core/dataset/forms.py +14 -10
- udata/core/dataset/models.py +8 -36
- udata/core/dataset/preview.py +3 -3
- udata/core/dataset/rdf.py +84 -65
- udata/core/dataset/tasks.py +2 -50
- udata/core/metrics/helpers.py +6 -7
- udata/core/metrics/tasks.py +3 -6
- udata/core/post/api.py +1 -2
- udata/core/reuse/api.py +1 -2
- udata/core/user/api.py +1 -3
- udata/cors.py +19 -2
- udata/harvest/backends/ckan/harvesters.py +10 -14
- udata/harvest/backends/maaf.py +15 -14
- udata/harvest/tests/ckan/test_ckan_backend.py +4 -3
- udata/harvest/tests/test_dcat_backend.py +3 -2
- udata/i18n.py +7 -32
- udata/migrations/2025-01-05-dataservices-fields-changes.py +1 -2
- udata/migrations/2025-09-04-update-legacy-frequencies.py +36 -0
- udata/settings.py +27 -0
- udata/templates/security/email/reset_instructions.html +1 -1
- udata/templates/security/email/reset_instructions.txt +1 -1
- udata/tests/api/test_datasets_api.py +41 -12
- udata/tests/dataset/test_dataset_model.py +17 -53
- udata/tests/dataset/test_dataset_rdf.py +27 -28
- udata/translations/udata.pot +226 -150
- udata/uris.py +1 -2
- udata/utils.py +8 -1
- {udata-11.1.2.dev7.dist-info → udata-11.1.2.dev11.dist-info}/METADATA +3 -4
- {udata-11.1.2.dev7.dist-info → udata-11.1.2.dev11.dist-info}/RECORD +50 -50
- udata/templates/mail/frequency_reminder.html +0 -34
- udata/templates/mail/frequency_reminder.txt +0 -18
- udata/tests/test_i18n.py +0 -93
- {udata-11.1.2.dev7.dist-info → udata-11.1.2.dev11.dist-info}/WHEEL +0 -0
- {udata-11.1.2.dev7.dist-info → udata-11.1.2.dev11.dist-info}/entry_points.txt +0 -0
- {udata-11.1.2.dev7.dist-info → udata-11.1.2.dev11.dist-info}/licenses/LICENSE +0 -0
- {udata-11.1.2.dev7.dist-info → udata-11.1.2.dev11.dist-info}/top_level.txt +0 -0
udata/core/dataset/apiv2.py
CHANGED
|
@@ -30,7 +30,7 @@ from .api_fields import (
|
|
|
30
30
|
temporal_coverage_fields,
|
|
31
31
|
user_ref_fields,
|
|
32
32
|
)
|
|
33
|
-
from .constants import
|
|
33
|
+
from .constants import DEFAULT_LICENSE, FULL_OBJECTS_HEADER, UpdateFrequency
|
|
34
34
|
from .models import CommunityResource, Dataset
|
|
35
35
|
from .search import DatasetSearch
|
|
36
36
|
|
|
@@ -157,13 +157,13 @@ dataset_fields = apiv2.model(
|
|
|
157
157
|
),
|
|
158
158
|
"frequency": fields.Raw(
|
|
159
159
|
attribute=lambda d: {
|
|
160
|
-
"id": d.frequency or
|
|
161
|
-
"label":
|
|
160
|
+
"id": (d.frequency or UpdateFrequency.UNKNOWN).id,
|
|
161
|
+
"label": (d.frequency or UpdateFrequency.UNKNOWN).label,
|
|
162
162
|
}
|
|
163
163
|
if request.headers.get(FULL_OBJECTS_HEADER, False, bool)
|
|
164
|
-
else d.frequency,
|
|
165
|
-
enum=list(
|
|
166
|
-
default=
|
|
164
|
+
else (d.frequency or UpdateFrequency.UNKNOWN),
|
|
165
|
+
enum=list(UpdateFrequency),
|
|
166
|
+
default=UpdateFrequency.UNKNOWN,
|
|
167
167
|
required=True,
|
|
168
168
|
description="The update frequency (full Frequency object if `X-Get-Datasets-Full-Objects` is set, ID of the frequency otherwise)",
|
|
169
169
|
),
|
udata/core/dataset/commands.py
CHANGED
|
@@ -10,7 +10,6 @@ from udata.core.dataset.constants import DEFAULT_LICENSE
|
|
|
10
10
|
from udata.models import Dataset, License
|
|
11
11
|
|
|
12
12
|
from . import actions
|
|
13
|
-
from .tasks import send_frequency_reminder
|
|
14
13
|
|
|
15
14
|
log = logging.getLogger(__name__)
|
|
16
15
|
|
|
@@ -66,15 +65,6 @@ def licenses(source=DEFAULT_LICENSE_FILE):
|
|
|
66
65
|
success("Done")
|
|
67
66
|
|
|
68
67
|
|
|
69
|
-
@cli.command()
|
|
70
|
-
def frequency_reminder():
|
|
71
|
-
"""Send a unique email per organization to members
|
|
72
|
-
|
|
73
|
-
to remind them they have outdated datasets on the website.
|
|
74
|
-
"""
|
|
75
|
-
send_frequency_reminder()
|
|
76
|
-
|
|
77
|
-
|
|
78
68
|
@cli.group("dataset")
|
|
79
69
|
def grp():
|
|
80
70
|
"""Dataset related operations"""
|
udata/core/dataset/constants.py
CHANGED
|
@@ -1,48 +1,134 @@
|
|
|
1
1
|
from collections import OrderedDict
|
|
2
|
+
from datetime import datetime, timedelta
|
|
3
|
+
from enum import StrEnum, auto
|
|
4
|
+
|
|
5
|
+
from flask_babel import LazyString
|
|
2
6
|
|
|
3
7
|
from udata.i18n import lazy_gettext as _
|
|
4
8
|
|
|
5
|
-
#: Udata frequencies with their labels
|
|
6
|
-
#:
|
|
7
|
-
#: See: http://dublincore.org/groups/collections/frequency/
|
|
8
|
-
UPDATE_FREQUENCIES = OrderedDict(
|
|
9
|
-
[ # Dublin core equivalent
|
|
10
|
-
("unknown", _("Unknown")), # N/A
|
|
11
|
-
("punctual", _("Punctual")), # N/A
|
|
12
|
-
("continuous", _("Real time")), # freq:continuous
|
|
13
|
-
("hourly", _("Hourly")), # N/A
|
|
14
|
-
("fourTimesADay", _("Four times a day")), # N/A
|
|
15
|
-
("threeTimesADay", _("Three times a day")), # N/A
|
|
16
|
-
("semidaily", _("Semidaily")), # N/A
|
|
17
|
-
("daily", _("Daily")), # freq:daily
|
|
18
|
-
("fourTimesAWeek", _("Four times a week")), # N/A
|
|
19
|
-
("threeTimesAWeek", _("Three times a week")), # freq:threeTimesAWeek
|
|
20
|
-
("semiweekly", _("Semiweekly")), # freq:semiweekly
|
|
21
|
-
("weekly", _("Weekly")), # freq:weekly
|
|
22
|
-
("biweekly", _("Biweekly")), # freq:bimonthly
|
|
23
|
-
("threeTimesAMonth", _("Three times a month")), # freq:threeTimesAMonth
|
|
24
|
-
("semimonthly", _("Semimonthly")), # freq:semimonthly
|
|
25
|
-
("monthly", _("Monthly")), # freq:monthly
|
|
26
|
-
("bimonthly", _("Bimonthly")), # freq:bimonthly
|
|
27
|
-
("quarterly", _("Quarterly")), # freq:quarterly
|
|
28
|
-
("threeTimesAYear", _("Three times a year")), # freq:threeTimesAYear
|
|
29
|
-
("semiannual", _("Biannual")), # freq:semiannual
|
|
30
|
-
("annual", _("Annual")), # freq:annual
|
|
31
|
-
("biennial", _("Biennial")), # freq:biennial
|
|
32
|
-
("triennial", _("Triennial")), # freq:triennial
|
|
33
|
-
("quinquennial", _("Quinquennial")), # N/A
|
|
34
|
-
("irregular", _("Irregular")), # freq:irregular
|
|
35
|
-
]
|
|
36
|
-
)
|
|
37
9
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
10
|
+
class UpdateFrequency(StrEnum):
|
|
11
|
+
"""
|
|
12
|
+
Udata frequency vocabulary
|
|
13
|
+
|
|
14
|
+
Based on the following vocabularies:
|
|
15
|
+
- DC: http://dublincore.org/groups/collections/frequency/
|
|
16
|
+
- EU: https://publications.europa.eu/en/web/eu-vocabularies/at-dataset/-/resource/dataset/frequency
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
CONTINUOUS = auto(), _("Real time"), None # DC, EU:UPDATE_CONT
|
|
20
|
+
ONE_MINUTE = "oneMinute", _("Every minute"), timedelta(minutes=1) # EU:1MIN
|
|
21
|
+
FIVE_MINUTES = "fiveMinutes", _("Every five minutes"), timedelta(minutes=5) # EU:5MIN
|
|
22
|
+
TEN_MINUTES = "tenMinutes", _("Every ten minutes"), timedelta(minutes=10) # EU:10MIN
|
|
23
|
+
FIFTEEN_MINUTES = (
|
|
24
|
+
"fifteenMinutes",
|
|
25
|
+
_("Every fifteen minutes"),
|
|
26
|
+
timedelta(minutes=15),
|
|
27
|
+
) # EU:15MIN
|
|
28
|
+
THIRTY_MINUTES = "thirtyMinutes", _("Every thirty minute"), timedelta(minutes=30) # EU:30MIN
|
|
29
|
+
HOURLY = auto(), _("Every hour"), timedelta(hours=1) # EU
|
|
30
|
+
BIHOURLY = auto(), _("Every two hours"), timedelta(hours=2) # EU
|
|
31
|
+
TRIHOURLY = auto(), _("Every three hours"), timedelta(hours=3) # EU
|
|
32
|
+
TWELVE_HOURS = "twelveHours", _("Every twelve hours"), timedelta(hours=12) # EU:12HRS
|
|
33
|
+
SEVERAL_TIMES_A_DAY = "severalTimesADay", _("Several times a day"), timedelta(days=1) # EU:CONT
|
|
34
|
+
THREE_TIMES_A_DAY = "threeTimesADay", _("Three times a day"), timedelta(days=1) # EU:DAILY_3
|
|
35
|
+
SEMIDAILY = auto(), _("Twice a day"), timedelta(days=1) # EU:DAILY_2
|
|
36
|
+
DAILY = auto(), _("Daily"), timedelta(days=1) # DC, EU
|
|
37
|
+
FIVE_TIMES_A_WEEK = "fiveTimesAWeek", _("Five times a week"), timedelta(weeks=1) # EU:WEEKLY_5
|
|
38
|
+
THREE_TIMES_A_WEEK = (
|
|
39
|
+
"threeTimesAWeek",
|
|
40
|
+
_("Three times a week"),
|
|
41
|
+
timedelta(weeks=1),
|
|
42
|
+
) # DC, EU:WEEKLY_3
|
|
43
|
+
SEMIWEEKLY = auto(), _("Twice a week"), timedelta(weeks=1) # DC, EU:WEEKLY_2
|
|
44
|
+
WEEKLY = auto(), _("Weekly"), timedelta(weeks=1) # DC, EU
|
|
45
|
+
BIWEEKLY = auto(), _("Every two weeks"), timedelta(weeks=2) # DC, EU
|
|
46
|
+
THREE_TIMES_A_MONTH = (
|
|
47
|
+
"threeTimesAMonth",
|
|
48
|
+
_("Three times a month"),
|
|
49
|
+
timedelta(days=31),
|
|
50
|
+
) # DC, EU:MONTHLY_3
|
|
51
|
+
SEMIMONTHLY = auto(), _("Twice a month"), timedelta(days=31) # DC, EU:MONTHLY_2
|
|
52
|
+
MONTHLY = auto(), _("Monthly"), timedelta(days=31) # DC, EU
|
|
53
|
+
BIMONTHLY = auto(), _("Every two months"), timedelta(days=31 * 2) # DC, EU
|
|
54
|
+
QUARTERLY = auto(), _("Quarterly"), timedelta(days=31 * 3) # DC, EU
|
|
55
|
+
THREE_TIMES_A_YEAR = (
|
|
56
|
+
"threeTimesAYear",
|
|
57
|
+
_("Three times a year"),
|
|
58
|
+
timedelta(days=365),
|
|
59
|
+
) # DC, EU:ANNUAL_3
|
|
60
|
+
SEMIANNUAL = auto(), _("Twice a year"), timedelta(days=365) # DC, EU:ANNUAL_2
|
|
61
|
+
ANNUAL = auto(), _("Annually"), timedelta(days=365) # DC, EU
|
|
62
|
+
BIENNIAL = auto(), _("Every two years"), timedelta(days=365 * 2) # DC, EU
|
|
63
|
+
TRIENNIAL = auto(), _("Every three years"), timedelta(days=365 * 3) # DC, EU
|
|
64
|
+
QUADRENNIAL = auto(), _("Every four years"), timedelta(days=365 * 4) # EU
|
|
65
|
+
QUINQUENNIAL = auto(), _("Every five years"), timedelta(days=365 * 5) # EU
|
|
66
|
+
DECENNIAL = auto(), _("Every ten years"), timedelta(days=365 * 10) # EU
|
|
67
|
+
BIDECENNIAL = auto(), _("Every twenty years"), timedelta(days=365 * 20) # EU
|
|
68
|
+
TRIDECENNIAL = auto(), _("Every thirty years"), timedelta(days=365 * 30) # EU
|
|
69
|
+
PUNCTUAL = auto(), _("Punctual"), None # EU:AS_NEEDED
|
|
70
|
+
IRREGULAR = auto(), _("Irregular"), None # DC, EU:IRREG
|
|
71
|
+
NEVER = auto(), _("Never"), None # EU
|
|
72
|
+
NOT_PLANNED = "notPlanned", _("Not planned"), None # EU:NOT_PLANNED
|
|
73
|
+
OTHER = auto(), _("Other"), None # EU
|
|
74
|
+
UNKNOWN = auto(), _("Unknown"), None # EU
|
|
75
|
+
|
|
76
|
+
def __new__(cls, id: str, label: LazyString, delta: timedelta | None):
|
|
77
|
+
# Set _value_ so the enum value-based lookup depends only on the id field.
|
|
78
|
+
# See https://docs.python.org/3/howto/enum.html#when-to-use-new-vs-init
|
|
79
|
+
obj = str.__new__(cls, id)
|
|
80
|
+
obj._value_ = id
|
|
81
|
+
obj._label = label # type: ignore[misc]
|
|
82
|
+
obj._delta = delta # type: ignore[misc]
|
|
83
|
+
return obj
|
|
84
|
+
|
|
85
|
+
@classmethod
|
|
86
|
+
def _missing_(cls, value) -> "UpdateFrequency | None":
|
|
87
|
+
if isinstance(value, str):
|
|
88
|
+
return UpdateFrequency._LEGACY_FREQUENCIES.get(value) # type: ignore[misc]
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def id(self) -> str:
|
|
92
|
+
return self.value
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def label(self) -> LazyString:
|
|
96
|
+
return self._label # type: ignore[misc]
|
|
97
|
+
|
|
98
|
+
@property
|
|
99
|
+
def delta(self) -> timedelta | None:
|
|
100
|
+
return self._delta # type: ignore[misc]
|
|
101
|
+
|
|
102
|
+
def next_update(self, last_update: datetime) -> datetime | None:
|
|
103
|
+
return last_update + self.delta if self.delta else None
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
# We must declare UpdateFrequency class variables after the Enum magic
|
|
107
|
+
# happens, so outside of class declaration.
|
|
108
|
+
#
|
|
109
|
+
# The alternative method based on _ignore_ breaks accessing the class
|
|
110
|
+
# variables from outside the class, because accesses to will go
|
|
111
|
+
# through __getattr__ as if it were an Enum entry.
|
|
112
|
+
#
|
|
113
|
+
# FIXME(python 3.13+): Use Enum._add_value_alias_ instead:
|
|
114
|
+
#
|
|
115
|
+
# UNKNOWN = auto(), _("Unknown"), None, []
|
|
116
|
+
# CONTINUOUS = auto(), _("Real time"), None, ["realtime"]
|
|
117
|
+
# SEVERAL_TIMES_A_DAY = "severalTimesADay", ..., ["fourTimesADay"]
|
|
118
|
+
#
|
|
119
|
+
# def __new__(cls, id: str, ..., aliases: list[str]):
|
|
120
|
+
# ...
|
|
121
|
+
# for alias in aliases:
|
|
122
|
+
# obj._add_value_alias_(alias)
|
|
123
|
+
#
|
|
124
|
+
UpdateFrequency._LEGACY_FREQUENCIES = { # type: ignore[misc]
|
|
125
|
+
"realtime": UpdateFrequency.CONTINUOUS,
|
|
126
|
+
"fourTimesADay": UpdateFrequency.SEVERAL_TIMES_A_DAY,
|
|
127
|
+
"fourTimesAWeek": UpdateFrequency.OTHER,
|
|
128
|
+
"fortnighly": UpdateFrequency.BIWEEKLY,
|
|
129
|
+
"biannual": UpdateFrequency.SEMIANNUAL,
|
|
43
130
|
}
|
|
44
131
|
|
|
45
|
-
DEFAULT_FREQUENCY = "unknown"
|
|
46
132
|
|
|
47
133
|
DEFAULT_LICENSE = {
|
|
48
134
|
"id": "notspecified",
|
udata/core/dataset/factories.py
CHANGED
|
@@ -8,6 +8,7 @@ from udata.core.organization.factories import OrganizationFactory
|
|
|
8
8
|
from udata.core.spatial.factories import SpatialCoverageFactory
|
|
9
9
|
from udata.factories import ModelFactory
|
|
10
10
|
|
|
11
|
+
from .constants import UpdateFrequency
|
|
11
12
|
from .models import Checksum, CommunityResource, Dataset, License, Resource
|
|
12
13
|
|
|
13
14
|
|
|
@@ -17,7 +18,7 @@ class DatasetFactory(ModelFactory):
|
|
|
17
18
|
|
|
18
19
|
title = factory.Faker("sentence")
|
|
19
20
|
description = factory.Faker("text")
|
|
20
|
-
frequency =
|
|
21
|
+
frequency = UpdateFrequency.UNKNOWN
|
|
21
22
|
resources = factory.LazyAttribute(lambda o: ResourceFactory.build_batch(o.nb_resources))
|
|
22
23
|
|
|
23
24
|
class Params:
|
udata/core/dataset/forms.py
CHANGED
|
@@ -6,14 +6,12 @@ from udata.mongo.errors import FieldValidationError
|
|
|
6
6
|
|
|
7
7
|
from .constants import (
|
|
8
8
|
CHECKSUM_TYPES,
|
|
9
|
-
DEFAULT_FREQUENCY,
|
|
10
9
|
DESCRIPTION_SHORT_SIZE_LIMIT,
|
|
11
10
|
DESCRIPTION_SIZE_LIMIT,
|
|
12
|
-
LEGACY_FREQUENCIES,
|
|
13
11
|
RESOURCE_FILETYPES,
|
|
14
12
|
RESOURCE_TYPES,
|
|
15
13
|
TITLE_SIZE_LIMIT,
|
|
16
|
-
|
|
14
|
+
UpdateFrequency,
|
|
17
15
|
)
|
|
18
16
|
from .models import (
|
|
19
17
|
Checksum,
|
|
@@ -117,10 +115,11 @@ class CommunityResourceForm(BaseResourceForm):
|
|
|
117
115
|
organization = fields.PublishAsField(_("Publish as"))
|
|
118
116
|
|
|
119
117
|
|
|
120
|
-
def
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
118
|
+
def unmarshal_frequency(form, field):
|
|
119
|
+
# We don't need to worry about invalid field.data being fed to UpdateFrequency here,
|
|
120
|
+
# since the API will already have ensured incoming data matches the field definition,
|
|
121
|
+
# which in our case is an enum of valid UpdateFrequency values.
|
|
122
|
+
field.data = UpdateFrequency(field.data)
|
|
124
123
|
|
|
125
124
|
|
|
126
125
|
def validate_contact_point(form, field):
|
|
@@ -160,10 +159,15 @@ class DatasetForm(ModelForm):
|
|
|
160
159
|
license = fields.ModelSelectField(_("License"), model=License, allow_blank=True)
|
|
161
160
|
frequency = fields.SelectField(
|
|
162
161
|
_("Update frequency"),
|
|
163
|
-
choices=list(
|
|
164
|
-
default=
|
|
162
|
+
choices=list(UpdateFrequency),
|
|
163
|
+
default=UpdateFrequency.UNKNOWN,
|
|
165
164
|
validators=[validators.optional()],
|
|
166
|
-
|
|
165
|
+
# Unmarshaling should not happen during validation, but flask-restx makes it cumbersome
|
|
166
|
+
# to do it earlier, requiring a request parser (unmarshaler) separate from the marshaler,
|
|
167
|
+
# meaning we can't use the same object for @api.expect and @api.marshal_with.
|
|
168
|
+
# This should get better once flask-restx moves to something like marshmallow, which
|
|
169
|
+
# handles marshaling/unmarshaling more symmetrically and in the same object.
|
|
170
|
+
preprocessors=[unmarshal_frequency],
|
|
167
171
|
description=_("The frequency at which data are updated."),
|
|
168
172
|
)
|
|
169
173
|
frequency_date = fields.DateTimeField(_("Expected frequency date"))
|
udata/core/dataset/models.py
CHANGED
|
@@ -38,7 +38,6 @@ from .constants import (
|
|
|
38
38
|
DESCRIPTION_SHORT_SIZE_LIMIT,
|
|
39
39
|
HVD,
|
|
40
40
|
INSPIRE,
|
|
41
|
-
LEGACY_FREQUENCIES,
|
|
42
41
|
MAX_DISTANCE,
|
|
43
42
|
PIVOTAL_DATA,
|
|
44
43
|
RESOURCE_FILETYPES,
|
|
@@ -47,7 +46,7 @@ from .constants import (
|
|
|
47
46
|
SL,
|
|
48
47
|
SPD,
|
|
49
48
|
SR,
|
|
50
|
-
|
|
49
|
+
UpdateFrequency,
|
|
51
50
|
)
|
|
52
51
|
from .exceptions import (
|
|
53
52
|
SchemasCacheUnavailableException,
|
|
@@ -580,7 +579,8 @@ class Dataset(Auditable, WithMetrics, DatasetBadgeMixin, Owned, Linkable, db.Doc
|
|
|
580
579
|
resources = field(db.ListField(db.EmbeddedDocumentField(Resource)), auditable=False)
|
|
581
580
|
|
|
582
581
|
private = field(db.BooleanField(default=False))
|
|
583
|
-
|
|
582
|
+
|
|
583
|
+
frequency = field(db.EnumField(UpdateFrequency))
|
|
584
584
|
frequency_date = field(db.DateTimeField(verbose_name=_("Future date of update")))
|
|
585
585
|
temporal_coverage = field(db.EmbeddedDocumentField(db.DateRange))
|
|
586
586
|
spatial = field(db.EmbeddedDocumentField(SpatialCoverage))
|
|
@@ -703,8 +703,6 @@ class Dataset(Auditable, WithMetrics, DatasetBadgeMixin, Owned, Linkable, db.Doc
|
|
|
703
703
|
|
|
704
704
|
def clean(self):
|
|
705
705
|
super(Dataset, self).clean()
|
|
706
|
-
if self.frequency in LEGACY_FREQUENCIES:
|
|
707
|
-
self.frequency = LEGACY_FREQUENCIES[self.frequency]
|
|
708
706
|
|
|
709
707
|
if len(set(res.id for res in self.resources)) != len(self.resources):
|
|
710
708
|
raise MongoEngineValidationError(f"Duplicate resource ID in dataset #{self.id}.")
|
|
@@ -782,8 +780,8 @@ class Dataset(Auditable, WithMetrics, DatasetBadgeMixin, Owned, Linkable, db.Doc
|
|
|
782
780
|
return self.owner.avatar.url
|
|
783
781
|
|
|
784
782
|
@property
|
|
785
|
-
def
|
|
786
|
-
return
|
|
783
|
+
def has_frequency(self):
|
|
784
|
+
return self.frequency not in [None, UpdateFrequency.UNKNOWN]
|
|
787
785
|
|
|
788
786
|
def check_availability(self):
|
|
789
787
|
"""Check if resources from that dataset are available.
|
|
@@ -835,33 +833,7 @@ class Dataset(Auditable, WithMetrics, DatasetBadgeMixin, Owned, Linkable, db.Doc
|
|
|
835
833
|
Ex: the next update for a threeTimesAday freq is not
|
|
836
834
|
every 8 hours, but is maximum 24 hours later.
|
|
837
835
|
"""
|
|
838
|
-
|
|
839
|
-
if self.frequency == "hourly":
|
|
840
|
-
delta = timedelta(hours=1)
|
|
841
|
-
elif self.frequency in ["fourTimesADay", "threeTimesADay", "semidaily", "daily"]:
|
|
842
|
-
delta = timedelta(days=1)
|
|
843
|
-
elif self.frequency in ["fourTimesAWeek", "threeTimesAWeek", "semiweekly", "weekly"]:
|
|
844
|
-
delta = timedelta(weeks=1)
|
|
845
|
-
elif self.frequency == "biweekly":
|
|
846
|
-
delta = timedelta(weeks=2)
|
|
847
|
-
elif self.frequency in ["threeTimesAMonth", "semimonthly", "monthly"]:
|
|
848
|
-
delta = timedelta(days=31)
|
|
849
|
-
elif self.frequency == "bimonthly":
|
|
850
|
-
delta = timedelta(days=31 * 2)
|
|
851
|
-
elif self.frequency == "quarterly":
|
|
852
|
-
delta = timedelta(days=365 / 4)
|
|
853
|
-
elif self.frequency in ["threeTimesAYear", "semiannual", "annual"]:
|
|
854
|
-
delta = timedelta(days=365)
|
|
855
|
-
elif self.frequency == "biennial":
|
|
856
|
-
delta = timedelta(days=365 * 2)
|
|
857
|
-
elif self.frequency == "triennial":
|
|
858
|
-
delta = timedelta(days=365 * 3)
|
|
859
|
-
elif self.frequency == "quinquennial":
|
|
860
|
-
delta = timedelta(days=365 * 5)
|
|
861
|
-
if delta is None:
|
|
862
|
-
return
|
|
863
|
-
else:
|
|
864
|
-
return self.last_update + delta
|
|
836
|
+
return self.frequency.next_update(self.last_update) if self.has_frequency else None
|
|
865
837
|
|
|
866
838
|
@property
|
|
867
839
|
def quality(self):
|
|
@@ -880,7 +852,7 @@ class Dataset(Auditable, WithMetrics, DatasetBadgeMixin, Owned, Linkable, db.Doc
|
|
|
880
852
|
# Allow for being one day late on update.
|
|
881
853
|
# We may have up to one day delay due to harvesting for example
|
|
882
854
|
quality["update_fulfilled_in_time"] = (next_update - datetime.utcnow()).days >= -1
|
|
883
|
-
elif self.
|
|
855
|
+
elif self.has_frequency and self.frequency.delta is None:
|
|
884
856
|
# For these frequencies, we don't expect regular updates or can't quantify them.
|
|
885
857
|
# Thus we consider the update_fulfilled_in_time quality criterion to be true.
|
|
886
858
|
quality["update_fulfilled_in_time"] = True
|
|
@@ -905,7 +877,7 @@ class Dataset(Auditable, WithMetrics, DatasetBadgeMixin, Owned, Linkable, db.Doc
|
|
|
905
877
|
result["temporal_coverage"] = True if self.temporal_coverage else False
|
|
906
878
|
result["spatial"] = True if self.spatial else False
|
|
907
879
|
|
|
908
|
-
result["update_frequency"] = self.
|
|
880
|
+
result["update_frequency"] = self.has_frequency
|
|
909
881
|
|
|
910
882
|
# We only save the next_update here because it is based on resources
|
|
911
883
|
# We cannot save the `update_fulfilled_in_time` because it is time
|
udata/core/dataset/preview.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from abc import ABC, abstractmethod
|
|
4
|
-
from typing import TYPE_CHECKING
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
5
|
|
|
6
6
|
from flask import current_app
|
|
7
7
|
|
|
@@ -12,12 +12,12 @@ if TYPE_CHECKING:
|
|
|
12
12
|
# Define an abstract class
|
|
13
13
|
class Preview(ABC):
|
|
14
14
|
@abstractmethod
|
|
15
|
-
def preview_url(self, resource: Resource) ->
|
|
15
|
+
def preview_url(self, resource: Resource) -> str | None:
|
|
16
16
|
return None
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
class TabularAPIPreview(Preview):
|
|
20
|
-
def preview_url(self, resource: Resource) ->
|
|
20
|
+
def preview_url(self, resource: Resource) -> str | None:
|
|
21
21
|
preview_base_url = current_app.config["TABULAR_EXPLORE_URL"]
|
|
22
22
|
if not preview_base_url:
|
|
23
23
|
return None
|
udata/core/dataset/rdf.py
CHANGED
|
@@ -6,7 +6,6 @@ import calendar
|
|
|
6
6
|
import json
|
|
7
7
|
import logging
|
|
8
8
|
from datetime import date, datetime
|
|
9
|
-
from typing import Optional
|
|
10
9
|
|
|
11
10
|
from dateutil.parser import parse as parse_dt
|
|
12
11
|
from flask import current_app
|
|
@@ -53,58 +52,85 @@ from udata.rdf import (
|
|
|
53
52
|
)
|
|
54
53
|
from udata.utils import get_by, safe_unicode, to_naive_datetime
|
|
55
54
|
|
|
56
|
-
from .constants import OGC_SERVICE_FORMATS,
|
|
55
|
+
from .constants import OGC_SERVICE_FORMATS, UpdateFrequency
|
|
57
56
|
from .models import Checksum, Dataset, License, Resource
|
|
58
57
|
|
|
59
58
|
log = logging.getLogger(__name__)
|
|
60
59
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
60
|
+
FREQ_TERM_TO_UDATA = {
|
|
61
|
+
FREQ.continuous: UpdateFrequency.CONTINUOUS,
|
|
62
|
+
FREQ.daily: UpdateFrequency.DAILY,
|
|
63
|
+
FREQ.threeTimesAWeek: UpdateFrequency.THREE_TIMES_A_WEEK,
|
|
64
|
+
FREQ.semiweekly: UpdateFrequency.SEMIWEEKLY,
|
|
65
|
+
FREQ.weekly: UpdateFrequency.WEEKLY,
|
|
66
|
+
FREQ.biweekly: UpdateFrequency.BIWEEKLY,
|
|
67
|
+
FREQ.threeTimesAMonth: UpdateFrequency.THREE_TIMES_A_MONTH,
|
|
68
|
+
FREQ.semimonthly: UpdateFrequency.SEMIMONTHLY,
|
|
69
|
+
FREQ.monthly: UpdateFrequency.MONTHLY,
|
|
70
|
+
FREQ.bimonthly: UpdateFrequency.BIMONTHLY,
|
|
71
|
+
FREQ.quarterly: UpdateFrequency.QUARTERLY,
|
|
72
|
+
FREQ.threeTimesAYear: UpdateFrequency.THREE_TIMES_A_YEAR,
|
|
73
|
+
FREQ.semiannual: UpdateFrequency.SEMIANNUAL,
|
|
74
|
+
FREQ.annual: UpdateFrequency.ANNUAL,
|
|
75
|
+
FREQ.biennial: UpdateFrequency.BIENNIAL,
|
|
76
|
+
FREQ.triennial: UpdateFrequency.TRIENNIAL,
|
|
77
|
+
FREQ.irregular: UpdateFrequency.IRREGULAR,
|
|
78
|
+
}
|
|
79
|
+
FREQ_ID_TO_UDATA = {
|
|
80
|
+
namespace_manager.compute_qname(k)[2].lower(): v for k, v in FREQ_TERM_TO_UDATA.items()
|
|
71
81
|
}
|
|
72
82
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
EUFREQ
|
|
80
|
-
EUFREQ
|
|
81
|
-
EUFREQ.
|
|
82
|
-
EUFREQ.
|
|
83
|
-
EUFREQ.
|
|
84
|
-
EUFREQ
|
|
85
|
-
EUFREQ.
|
|
86
|
-
EUFREQ.
|
|
87
|
-
EUFREQ.
|
|
88
|
-
|
|
89
|
-
EUFREQ.
|
|
90
|
-
EUFREQ.
|
|
91
|
-
EUFREQ.
|
|
92
|
-
EUFREQ.
|
|
93
|
-
EUFREQ.
|
|
94
|
-
EUFREQ.
|
|
95
|
-
EUFREQ.
|
|
96
|
-
EUFREQ.
|
|
97
|
-
EUFREQ.
|
|
98
|
-
EUFREQ.
|
|
99
|
-
EUFREQ.
|
|
100
|
-
EUFREQ.
|
|
101
|
-
EUFREQ.
|
|
83
|
+
EUFREQ_TERM_TO_UDATA = {
|
|
84
|
+
EUFREQ.UNKNOWN: UpdateFrequency.UNKNOWN,
|
|
85
|
+
EUFREQ.UPDATE_CONT: UpdateFrequency.CONTINUOUS,
|
|
86
|
+
getattr(EUFREQ, "1MIN"): UpdateFrequency.ONE_MINUTE,
|
|
87
|
+
getattr(EUFREQ, "5MIN"): UpdateFrequency.FIVE_MINUTES,
|
|
88
|
+
getattr(EUFREQ, "10MIN"): UpdateFrequency.TEN_MINUTES,
|
|
89
|
+
getattr(EUFREQ, "15MIN"): UpdateFrequency.FIFTEEN_MINUTES,
|
|
90
|
+
getattr(EUFREQ, "30MIN"): UpdateFrequency.THIRTY_MINUTES,
|
|
91
|
+
EUFREQ.HOURLY: UpdateFrequency.HOURLY,
|
|
92
|
+
EUFREQ.BIHOURLY: UpdateFrequency.BIHOURLY,
|
|
93
|
+
EUFREQ.TRIHOURLY: UpdateFrequency.TRIHOURLY,
|
|
94
|
+
getattr(EUFREQ, "12HRS"): UpdateFrequency.TWELVE_HOURS,
|
|
95
|
+
EUFREQ.CONT: UpdateFrequency.SEVERAL_TIMES_A_DAY,
|
|
96
|
+
EUFREQ.DAILY_3: UpdateFrequency.THREE_TIMES_A_DAY,
|
|
97
|
+
EUFREQ.DAILY_2: UpdateFrequency.SEMIDAILY,
|
|
98
|
+
EUFREQ.DAILY: UpdateFrequency.DAILY,
|
|
99
|
+
EUFREQ.WEEKLY_5: UpdateFrequency.FIVE_TIMES_A_WEEK,
|
|
100
|
+
EUFREQ.WEEKLY_3: UpdateFrequency.THREE_TIMES_A_WEEK,
|
|
101
|
+
EUFREQ.WEEKLY_2: UpdateFrequency.SEMIWEEKLY,
|
|
102
|
+
EUFREQ.WEEKLY: UpdateFrequency.WEEKLY,
|
|
103
|
+
EUFREQ.BIWEEKLY: UpdateFrequency.BIWEEKLY,
|
|
104
|
+
EUFREQ.MONTHLY_3: UpdateFrequency.THREE_TIMES_A_MONTH,
|
|
105
|
+
EUFREQ.MONTHLY_2: UpdateFrequency.SEMIMONTHLY,
|
|
106
|
+
EUFREQ.MONTHLY: UpdateFrequency.MONTHLY,
|
|
107
|
+
EUFREQ.BIMONTHLY: UpdateFrequency.BIMONTHLY,
|
|
108
|
+
EUFREQ.QUARTERLY: UpdateFrequency.QUARTERLY,
|
|
109
|
+
EUFREQ.ANNUAL_3: UpdateFrequency.THREE_TIMES_A_YEAR,
|
|
110
|
+
EUFREQ.ANNUAL_2: UpdateFrequency.SEMIANNUAL,
|
|
111
|
+
EUFREQ.ANNUAL: UpdateFrequency.ANNUAL,
|
|
112
|
+
EUFREQ.BIENNIAL: UpdateFrequency.BIENNIAL,
|
|
113
|
+
EUFREQ.TRIENNIAL: UpdateFrequency.TRIENNIAL,
|
|
114
|
+
EUFREQ.QUADRENNIAL: UpdateFrequency.QUADRENNIAL,
|
|
115
|
+
EUFREQ.QUINQUENNIAL: UpdateFrequency.QUINQUENNIAL,
|
|
116
|
+
EUFREQ.DECENNIAL: UpdateFrequency.DECENNIAL,
|
|
117
|
+
EUFREQ.BIDECENNIAL: UpdateFrequency.BIDECENNIAL,
|
|
118
|
+
EUFREQ.TRIDECENNIAL: UpdateFrequency.TRIDECENNIAL,
|
|
119
|
+
EUFREQ.AS_NEEDED: UpdateFrequency.PUNCTUAL,
|
|
120
|
+
EUFREQ.IRREG: UpdateFrequency.IRREGULAR,
|
|
121
|
+
EUFREQ.NEVER: UpdateFrequency.NEVER,
|
|
122
|
+
EUFREQ.NOT_PLANNED: UpdateFrequency.NOT_PLANNED,
|
|
123
|
+
EUFREQ.OTHER: UpdateFrequency.OTHER,
|
|
124
|
+
}
|
|
125
|
+
EUFREQ_ID_TO_UDATA = {
|
|
126
|
+
namespace_manager.compute_qname(k)[2].lower(): v for k, v in EUFREQ_TERM_TO_UDATA.items()
|
|
102
127
|
}
|
|
103
128
|
|
|
129
|
+
# Merge order matters: we want FREQ to win over EUFREQ
|
|
130
|
+
UDATA_FREQ_ID_TO_TERM = {v: k for k, v in {**EUFREQ_TERM_TO_UDATA, **FREQ_TERM_TO_UDATA}.items()}
|
|
104
131
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
) -> Optional[RdfResource]:
|
|
132
|
+
|
|
133
|
+
def temporal_to_rdf(daterange: db.DateRange, graph: Graph | None = None) -> RdfResource | None:
|
|
108
134
|
if not daterange:
|
|
109
135
|
return
|
|
110
136
|
graph = graph or Graph(namespace_manager=namespace_manager)
|
|
@@ -117,13 +143,11 @@ def temporal_to_rdf(
|
|
|
117
143
|
return pot
|
|
118
144
|
|
|
119
145
|
|
|
120
|
-
def frequency_to_rdf(frequency:
|
|
121
|
-
|
|
122
|
-
return
|
|
123
|
-
return RDF_FREQUENCIES.get(frequency, getattr(FREQ, frequency))
|
|
146
|
+
def frequency_to_rdf(frequency: UpdateFrequency | None, graph: Graph | None = None) -> str | None:
|
|
147
|
+
return UDATA_FREQ_ID_TO_TERM.get(frequency)
|
|
124
148
|
|
|
125
149
|
|
|
126
|
-
def owner_to_rdf(dataset: Dataset, graph:
|
|
150
|
+
def owner_to_rdf(dataset: Dataset, graph: Graph | None = None) -> RdfResource | None:
|
|
127
151
|
from udata.core.organization.rdf import organization_to_rdf
|
|
128
152
|
from udata.core.user.rdf import user_to_rdf
|
|
129
153
|
|
|
@@ -134,7 +158,7 @@ def owner_to_rdf(dataset: Dataset, graph: Optional[Graph] = None) -> Optional[Rd
|
|
|
134
158
|
return
|
|
135
159
|
|
|
136
160
|
|
|
137
|
-
def detect_ogc_service(resource: Resource) ->
|
|
161
|
+
def detect_ogc_service(resource: Resource) -> str | None:
|
|
138
162
|
"""
|
|
139
163
|
Detect if the resource points towards an OGC Service based on either
|
|
140
164
|
* a known OGC Service format
|
|
@@ -153,8 +177,8 @@ def detect_ogc_service(resource: Resource) -> Optional[str]:
|
|
|
153
177
|
def ogc_service_to_rdf(
|
|
154
178
|
dataset: Dataset,
|
|
155
179
|
resource: Resource,
|
|
156
|
-
ogc_service_type:
|
|
157
|
-
graph:
|
|
180
|
+
ogc_service_type: str | None = None,
|
|
181
|
+
graph: Graph | None = None,
|
|
158
182
|
is_hvd: bool = False,
|
|
159
183
|
) -> RdfResource:
|
|
160
184
|
"""
|
|
@@ -196,8 +220,8 @@ def ogc_service_to_rdf(
|
|
|
196
220
|
|
|
197
221
|
def resource_to_rdf(
|
|
198
222
|
resource: Resource,
|
|
199
|
-
dataset:
|
|
200
|
-
graph:
|
|
223
|
+
dataset: Dataset | None = None,
|
|
224
|
+
graph: Graph | None = None,
|
|
201
225
|
is_hvd: bool = False,
|
|
202
226
|
) -> RdfResource:
|
|
203
227
|
"""
|
|
@@ -261,7 +285,7 @@ def dataset_to_graph_id(dataset: Dataset) -> URIRef | BNode:
|
|
|
261
285
|
return BNode()
|
|
262
286
|
|
|
263
287
|
|
|
264
|
-
def dataset_to_rdf(dataset: Dataset, graph:
|
|
288
|
+
def dataset_to_rdf(dataset: Dataset, graph: Graph | None = None) -> RdfResource:
|
|
265
289
|
"""
|
|
266
290
|
Map a dataset domain model to a DCAT/RDF graph
|
|
267
291
|
"""
|
|
@@ -336,8 +360,7 @@ def dataset_to_rdf(dataset: Dataset, graph: Optional[Graph] = None) -> RdfResour
|
|
|
336
360
|
if dataset.temporal_coverage:
|
|
337
361
|
d.set(DCT.temporal, temporal_to_rdf(dataset.temporal_coverage, graph))
|
|
338
362
|
|
|
339
|
-
frequency
|
|
340
|
-
if frequency:
|
|
363
|
+
if frequency := frequency_to_rdf(dataset.frequency):
|
|
341
364
|
d.set(DCT.accrualPeriodicity, frequency)
|
|
342
365
|
|
|
343
366
|
owner_role = DCT.publisher
|
|
@@ -513,23 +536,19 @@ def spatial_from_rdf(graph):
|
|
|
513
536
|
return None
|
|
514
537
|
|
|
515
538
|
|
|
516
|
-
def frequency_from_rdf(term):
|
|
539
|
+
def frequency_from_rdf(term) -> UpdateFrequency | None:
|
|
517
540
|
if isinstance(term, str):
|
|
518
541
|
try:
|
|
519
542
|
term = URIRef(uris.validate(term))
|
|
520
543
|
except uris.ValidationError:
|
|
521
544
|
pass
|
|
522
545
|
if isinstance(term, Literal):
|
|
523
|
-
|
|
524
|
-
|
|
546
|
+
term = term.toPython().lower()
|
|
547
|
+
return FREQ_ID_TO_UDATA.get(term) or EUFREQ_ID_TO_UDATA.get(term)
|
|
525
548
|
if isinstance(term, RdfResource):
|
|
526
549
|
term = term.identifier
|
|
527
550
|
if isinstance(term, URIRef):
|
|
528
|
-
|
|
529
|
-
return EU_RDF_REQUENCIES.get(term)
|
|
530
|
-
_, _, freq = namespace_manager.compute_qname(term)
|
|
531
|
-
if freq.lower() in UPDATE_FREQUENCIES:
|
|
532
|
-
return freq.lower()
|
|
551
|
+
return FREQ_TERM_TO_UDATA.get(term) or EUFREQ_TERM_TO_UDATA.get(term)
|
|
533
552
|
|
|
534
553
|
|
|
535
554
|
def mime_from_rdf(resource):
|