udata 11.1.2.dev8__py3-none-any.whl → 11.1.2.dev11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of udata might be problematic. Click here for more details.
- udata/api/oauth2.py +22 -3
- udata/app.py +3 -0
- udata/auth/__init__.py +11 -0
- udata/auth/forms.py +70 -3
- udata/auth/mails.py +6 -0
- udata/auth/proconnect.py +127 -0
- udata/auth/views.py +57 -2
- udata/core/__init__.py +2 -0
- udata/core/captchetat.py +80 -0
- udata/core/dataset/api.py +2 -2
- udata/core/dataset/api_fields.py +3 -4
- udata/core/dataset/apiv2.py +6 -6
- udata/core/dataset/commands.py +0 -10
- udata/core/dataset/constants.py +124 -38
- udata/core/dataset/factories.py +2 -1
- udata/core/dataset/forms.py +14 -10
- udata/core/dataset/models.py +8 -36
- udata/core/dataset/rdf.py +76 -54
- udata/core/dataset/tasks.py +2 -50
- udata/cors.py +19 -2
- udata/harvest/backends/ckan/harvesters.py +10 -14
- udata/harvest/backends/maaf.py +15 -14
- udata/harvest/tests/ckan/test_ckan_backend.py +4 -3
- udata/harvest/tests/test_dcat_backend.py +3 -2
- udata/i18n.py +7 -32
- udata/migrations/2025-09-04-update-legacy-frequencies.py +36 -0
- udata/settings.py +27 -0
- udata/templates/security/email/reset_instructions.html +1 -1
- udata/templates/security/email/reset_instructions.txt +1 -1
- udata/tests/api/test_datasets_api.py +41 -12
- udata/tests/dataset/test_dataset_model.py +17 -53
- udata/tests/dataset/test_dataset_rdf.py +27 -28
- udata/translations/udata.pot +226 -150
- udata/utils.py +8 -1
- {udata-11.1.2.dev8.dist-info → udata-11.1.2.dev11.dist-info}/METADATA +1 -1
- {udata-11.1.2.dev8.dist-info → udata-11.1.2.dev11.dist-info}/RECORD +40 -40
- udata/templates/mail/frequency_reminder.html +0 -34
- udata/templates/mail/frequency_reminder.txt +0 -18
- udata/tests/test_i18n.py +0 -93
- {udata-11.1.2.dev8.dist-info → udata-11.1.2.dev11.dist-info}/WHEEL +0 -0
- {udata-11.1.2.dev8.dist-info → udata-11.1.2.dev11.dist-info}/entry_points.txt +0 -0
- {udata-11.1.2.dev8.dist-info → udata-11.1.2.dev11.dist-info}/licenses/LICENSE +0 -0
- {udata-11.1.2.dev8.dist-info → udata-11.1.2.dev11.dist-info}/top_level.txt +0 -0
udata/core/dataset/constants.py
CHANGED
|
@@ -1,48 +1,134 @@
|
|
|
1
1
|
from collections import OrderedDict
|
|
2
|
+
from datetime import datetime, timedelta
|
|
3
|
+
from enum import StrEnum, auto
|
|
4
|
+
|
|
5
|
+
from flask_babel import LazyString
|
|
2
6
|
|
|
3
7
|
from udata.i18n import lazy_gettext as _
|
|
4
8
|
|
|
5
|
-
#: Udata frequencies with their labels
|
|
6
|
-
#:
|
|
7
|
-
#: See: http://dublincore.org/groups/collections/frequency/
|
|
8
|
-
UPDATE_FREQUENCIES = OrderedDict(
|
|
9
|
-
[ # Dublin core equivalent
|
|
10
|
-
("unknown", _("Unknown")), # N/A
|
|
11
|
-
("punctual", _("Punctual")), # N/A
|
|
12
|
-
("continuous", _("Real time")), # freq:continuous
|
|
13
|
-
("hourly", _("Hourly")), # N/A
|
|
14
|
-
("fourTimesADay", _("Four times a day")), # N/A
|
|
15
|
-
("threeTimesADay", _("Three times a day")), # N/A
|
|
16
|
-
("semidaily", _("Semidaily")), # N/A
|
|
17
|
-
("daily", _("Daily")), # freq:daily
|
|
18
|
-
("fourTimesAWeek", _("Four times a week")), # N/A
|
|
19
|
-
("threeTimesAWeek", _("Three times a week")), # freq:threeTimesAWeek
|
|
20
|
-
("semiweekly", _("Semiweekly")), # freq:semiweekly
|
|
21
|
-
("weekly", _("Weekly")), # freq:weekly
|
|
22
|
-
("biweekly", _("Biweekly")), # freq:bimonthly
|
|
23
|
-
("threeTimesAMonth", _("Three times a month")), # freq:threeTimesAMonth
|
|
24
|
-
("semimonthly", _("Semimonthly")), # freq:semimonthly
|
|
25
|
-
("monthly", _("Monthly")), # freq:monthly
|
|
26
|
-
("bimonthly", _("Bimonthly")), # freq:bimonthly
|
|
27
|
-
("quarterly", _("Quarterly")), # freq:quarterly
|
|
28
|
-
("threeTimesAYear", _("Three times a year")), # freq:threeTimesAYear
|
|
29
|
-
("semiannual", _("Biannual")), # freq:semiannual
|
|
30
|
-
("annual", _("Annual")), # freq:annual
|
|
31
|
-
("biennial", _("Biennial")), # freq:biennial
|
|
32
|
-
("triennial", _("Triennial")), # freq:triennial
|
|
33
|
-
("quinquennial", _("Quinquennial")), # N/A
|
|
34
|
-
("irregular", _("Irregular")), # freq:irregular
|
|
35
|
-
]
|
|
36
|
-
)
|
|
37
9
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
10
|
+
class UpdateFrequency(StrEnum):
|
|
11
|
+
"""
|
|
12
|
+
Udata frequency vocabulary
|
|
13
|
+
|
|
14
|
+
Based on the following vocabularies:
|
|
15
|
+
- DC: http://dublincore.org/groups/collections/frequency/
|
|
16
|
+
- EU: https://publications.europa.eu/en/web/eu-vocabularies/at-dataset/-/resource/dataset/frequency
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
CONTINUOUS = auto(), _("Real time"), None # DC, EU:UPDATE_CONT
|
|
20
|
+
ONE_MINUTE = "oneMinute", _("Every minute"), timedelta(minutes=1) # EU:1MIN
|
|
21
|
+
FIVE_MINUTES = "fiveMinutes", _("Every five minutes"), timedelta(minutes=5) # EU:5MIN
|
|
22
|
+
TEN_MINUTES = "tenMinutes", _("Every ten minutes"), timedelta(minutes=10) # EU:10MIN
|
|
23
|
+
FIFTEEN_MINUTES = (
|
|
24
|
+
"fifteenMinutes",
|
|
25
|
+
_("Every fifteen minutes"),
|
|
26
|
+
timedelta(minutes=15),
|
|
27
|
+
) # EU:15MIN
|
|
28
|
+
THIRTY_MINUTES = "thirtyMinutes", _("Every thirty minute"), timedelta(minutes=30) # EU:30MIN
|
|
29
|
+
HOURLY = auto(), _("Every hour"), timedelta(hours=1) # EU
|
|
30
|
+
BIHOURLY = auto(), _("Every two hours"), timedelta(hours=2) # EU
|
|
31
|
+
TRIHOURLY = auto(), _("Every three hours"), timedelta(hours=3) # EU
|
|
32
|
+
TWELVE_HOURS = "twelveHours", _("Every twelve hours"), timedelta(hours=12) # EU:12HRS
|
|
33
|
+
SEVERAL_TIMES_A_DAY = "severalTimesADay", _("Several times a day"), timedelta(days=1) # EU:CONT
|
|
34
|
+
THREE_TIMES_A_DAY = "threeTimesADay", _("Three times a day"), timedelta(days=1) # EU:DAILY_3
|
|
35
|
+
SEMIDAILY = auto(), _("Twice a day"), timedelta(days=1) # EU:DAILY_2
|
|
36
|
+
DAILY = auto(), _("Daily"), timedelta(days=1) # DC, EU
|
|
37
|
+
FIVE_TIMES_A_WEEK = "fiveTimesAWeek", _("Five times a week"), timedelta(weeks=1) # EU:WEEKLY_5
|
|
38
|
+
THREE_TIMES_A_WEEK = (
|
|
39
|
+
"threeTimesAWeek",
|
|
40
|
+
_("Three times a week"),
|
|
41
|
+
timedelta(weeks=1),
|
|
42
|
+
) # DC, EU:WEEKLY_3
|
|
43
|
+
SEMIWEEKLY = auto(), _("Twice a week"), timedelta(weeks=1) # DC, EU:WEEKLY_2
|
|
44
|
+
WEEKLY = auto(), _("Weekly"), timedelta(weeks=1) # DC, EU
|
|
45
|
+
BIWEEKLY = auto(), _("Every two weeks"), timedelta(weeks=2) # DC, EU
|
|
46
|
+
THREE_TIMES_A_MONTH = (
|
|
47
|
+
"threeTimesAMonth",
|
|
48
|
+
_("Three times a month"),
|
|
49
|
+
timedelta(days=31),
|
|
50
|
+
) # DC, EU:MONTHLY_3
|
|
51
|
+
SEMIMONTHLY = auto(), _("Twice a month"), timedelta(days=31) # DC, EU:MONTHLY_2
|
|
52
|
+
MONTHLY = auto(), _("Monthly"), timedelta(days=31) # DC, EU
|
|
53
|
+
BIMONTHLY = auto(), _("Every two months"), timedelta(days=31 * 2) # DC, EU
|
|
54
|
+
QUARTERLY = auto(), _("Quarterly"), timedelta(days=31 * 3) # DC, EU
|
|
55
|
+
THREE_TIMES_A_YEAR = (
|
|
56
|
+
"threeTimesAYear",
|
|
57
|
+
_("Three times a year"),
|
|
58
|
+
timedelta(days=365),
|
|
59
|
+
) # DC, EU:ANNUAL_3
|
|
60
|
+
SEMIANNUAL = auto(), _("Twice a year"), timedelta(days=365) # DC, EU:ANNUAL_2
|
|
61
|
+
ANNUAL = auto(), _("Annually"), timedelta(days=365) # DC, EU
|
|
62
|
+
BIENNIAL = auto(), _("Every two years"), timedelta(days=365 * 2) # DC, EU
|
|
63
|
+
TRIENNIAL = auto(), _("Every three years"), timedelta(days=365 * 3) # DC, EU
|
|
64
|
+
QUADRENNIAL = auto(), _("Every four years"), timedelta(days=365 * 4) # EU
|
|
65
|
+
QUINQUENNIAL = auto(), _("Every five years"), timedelta(days=365 * 5) # EU
|
|
66
|
+
DECENNIAL = auto(), _("Every ten years"), timedelta(days=365 * 10) # EU
|
|
67
|
+
BIDECENNIAL = auto(), _("Every twenty years"), timedelta(days=365 * 20) # EU
|
|
68
|
+
TRIDECENNIAL = auto(), _("Every thirty years"), timedelta(days=365 * 30) # EU
|
|
69
|
+
PUNCTUAL = auto(), _("Punctual"), None # EU:AS_NEEDED
|
|
70
|
+
IRREGULAR = auto(), _("Irregular"), None # DC, EU:IRREG
|
|
71
|
+
NEVER = auto(), _("Never"), None # EU
|
|
72
|
+
NOT_PLANNED = "notPlanned", _("Not planned"), None # EU:NOT_PLANNED
|
|
73
|
+
OTHER = auto(), _("Other"), None # EU
|
|
74
|
+
UNKNOWN = auto(), _("Unknown"), None # EU
|
|
75
|
+
|
|
76
|
+
def __new__(cls, id: str, label: LazyString, delta: timedelta | None):
|
|
77
|
+
# Set _value_ so the enum value-based lookup depends only on the id field.
|
|
78
|
+
# See https://docs.python.org/3/howto/enum.html#when-to-use-new-vs-init
|
|
79
|
+
obj = str.__new__(cls, id)
|
|
80
|
+
obj._value_ = id
|
|
81
|
+
obj._label = label # type: ignore[misc]
|
|
82
|
+
obj._delta = delta # type: ignore[misc]
|
|
83
|
+
return obj
|
|
84
|
+
|
|
85
|
+
@classmethod
|
|
86
|
+
def _missing_(cls, value) -> "UpdateFrequency | None":
|
|
87
|
+
if isinstance(value, str):
|
|
88
|
+
return UpdateFrequency._LEGACY_FREQUENCIES.get(value) # type: ignore[misc]
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def id(self) -> str:
|
|
92
|
+
return self.value
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def label(self) -> LazyString:
|
|
96
|
+
return self._label # type: ignore[misc]
|
|
97
|
+
|
|
98
|
+
@property
|
|
99
|
+
def delta(self) -> timedelta | None:
|
|
100
|
+
return self._delta # type: ignore[misc]
|
|
101
|
+
|
|
102
|
+
def next_update(self, last_update: datetime) -> datetime | None:
|
|
103
|
+
return last_update + self.delta if self.delta else None
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
# We must declare UpdateFrequency class variables after the Enum magic
|
|
107
|
+
# happens, so outside of class declaration.
|
|
108
|
+
#
|
|
109
|
+
# The alternative method based on _ignore_ breaks accessing the class
|
|
110
|
+
# variables from outside the class, because accesses to will go
|
|
111
|
+
# through __getattr__ as if it were an Enum entry.
|
|
112
|
+
#
|
|
113
|
+
# FIXME(python 3.13+): Use Enum._add_value_alias_ instead:
|
|
114
|
+
#
|
|
115
|
+
# UNKNOWN = auto(), _("Unknown"), None, []
|
|
116
|
+
# CONTINUOUS = auto(), _("Real time"), None, ["realtime"]
|
|
117
|
+
# SEVERAL_TIMES_A_DAY = "severalTimesADay", ..., ["fourTimesADay"]
|
|
118
|
+
#
|
|
119
|
+
# def __new__(cls, id: str, ..., aliases: list[str]):
|
|
120
|
+
# ...
|
|
121
|
+
# for alias in aliases:
|
|
122
|
+
# obj._add_value_alias_(alias)
|
|
123
|
+
#
|
|
124
|
+
UpdateFrequency._LEGACY_FREQUENCIES = { # type: ignore[misc]
|
|
125
|
+
"realtime": UpdateFrequency.CONTINUOUS,
|
|
126
|
+
"fourTimesADay": UpdateFrequency.SEVERAL_TIMES_A_DAY,
|
|
127
|
+
"fourTimesAWeek": UpdateFrequency.OTHER,
|
|
128
|
+
"fortnighly": UpdateFrequency.BIWEEKLY,
|
|
129
|
+
"biannual": UpdateFrequency.SEMIANNUAL,
|
|
43
130
|
}
|
|
44
131
|
|
|
45
|
-
DEFAULT_FREQUENCY = "unknown"
|
|
46
132
|
|
|
47
133
|
DEFAULT_LICENSE = {
|
|
48
134
|
"id": "notspecified",
|
udata/core/dataset/factories.py
CHANGED
|
@@ -8,6 +8,7 @@ from udata.core.organization.factories import OrganizationFactory
|
|
|
8
8
|
from udata.core.spatial.factories import SpatialCoverageFactory
|
|
9
9
|
from udata.factories import ModelFactory
|
|
10
10
|
|
|
11
|
+
from .constants import UpdateFrequency
|
|
11
12
|
from .models import Checksum, CommunityResource, Dataset, License, Resource
|
|
12
13
|
|
|
13
14
|
|
|
@@ -17,7 +18,7 @@ class DatasetFactory(ModelFactory):
|
|
|
17
18
|
|
|
18
19
|
title = factory.Faker("sentence")
|
|
19
20
|
description = factory.Faker("text")
|
|
20
|
-
frequency =
|
|
21
|
+
frequency = UpdateFrequency.UNKNOWN
|
|
21
22
|
resources = factory.LazyAttribute(lambda o: ResourceFactory.build_batch(o.nb_resources))
|
|
22
23
|
|
|
23
24
|
class Params:
|
udata/core/dataset/forms.py
CHANGED
|
@@ -6,14 +6,12 @@ from udata.mongo.errors import FieldValidationError
|
|
|
6
6
|
|
|
7
7
|
from .constants import (
|
|
8
8
|
CHECKSUM_TYPES,
|
|
9
|
-
DEFAULT_FREQUENCY,
|
|
10
9
|
DESCRIPTION_SHORT_SIZE_LIMIT,
|
|
11
10
|
DESCRIPTION_SIZE_LIMIT,
|
|
12
|
-
LEGACY_FREQUENCIES,
|
|
13
11
|
RESOURCE_FILETYPES,
|
|
14
12
|
RESOURCE_TYPES,
|
|
15
13
|
TITLE_SIZE_LIMIT,
|
|
16
|
-
|
|
14
|
+
UpdateFrequency,
|
|
17
15
|
)
|
|
18
16
|
from .models import (
|
|
19
17
|
Checksum,
|
|
@@ -117,10 +115,11 @@ class CommunityResourceForm(BaseResourceForm):
|
|
|
117
115
|
organization = fields.PublishAsField(_("Publish as"))
|
|
118
116
|
|
|
119
117
|
|
|
120
|
-
def
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
118
|
+
def unmarshal_frequency(form, field):
|
|
119
|
+
# We don't need to worry about invalid field.data being fed to UpdateFrequency here,
|
|
120
|
+
# since the API will already have ensured incoming data matches the field definition,
|
|
121
|
+
# which in our case is an enum of valid UpdateFrequency values.
|
|
122
|
+
field.data = UpdateFrequency(field.data)
|
|
124
123
|
|
|
125
124
|
|
|
126
125
|
def validate_contact_point(form, field):
|
|
@@ -160,10 +159,15 @@ class DatasetForm(ModelForm):
|
|
|
160
159
|
license = fields.ModelSelectField(_("License"), model=License, allow_blank=True)
|
|
161
160
|
frequency = fields.SelectField(
|
|
162
161
|
_("Update frequency"),
|
|
163
|
-
choices=list(
|
|
164
|
-
default=
|
|
162
|
+
choices=list(UpdateFrequency),
|
|
163
|
+
default=UpdateFrequency.UNKNOWN,
|
|
165
164
|
validators=[validators.optional()],
|
|
166
|
-
|
|
165
|
+
# Unmarshaling should not happen during validation, but flask-restx makes it cumbersome
|
|
166
|
+
# to do it earlier, requiring a request parser (unmarshaler) separate from the marshaler,
|
|
167
|
+
# meaning we can't use the same object for @api.expect and @api.marshal_with.
|
|
168
|
+
# This should get better once flask-restx moves to something like marshmallow, which
|
|
169
|
+
# handles marshaling/unmarshaling more symmetrically and in the same object.
|
|
170
|
+
preprocessors=[unmarshal_frequency],
|
|
167
171
|
description=_("The frequency at which data are updated."),
|
|
168
172
|
)
|
|
169
173
|
frequency_date = fields.DateTimeField(_("Expected frequency date"))
|
udata/core/dataset/models.py
CHANGED
|
@@ -38,7 +38,6 @@ from .constants import (
|
|
|
38
38
|
DESCRIPTION_SHORT_SIZE_LIMIT,
|
|
39
39
|
HVD,
|
|
40
40
|
INSPIRE,
|
|
41
|
-
LEGACY_FREQUENCIES,
|
|
42
41
|
MAX_DISTANCE,
|
|
43
42
|
PIVOTAL_DATA,
|
|
44
43
|
RESOURCE_FILETYPES,
|
|
@@ -47,7 +46,7 @@ from .constants import (
|
|
|
47
46
|
SL,
|
|
48
47
|
SPD,
|
|
49
48
|
SR,
|
|
50
|
-
|
|
49
|
+
UpdateFrequency,
|
|
51
50
|
)
|
|
52
51
|
from .exceptions import (
|
|
53
52
|
SchemasCacheUnavailableException,
|
|
@@ -580,7 +579,8 @@ class Dataset(Auditable, WithMetrics, DatasetBadgeMixin, Owned, Linkable, db.Doc
|
|
|
580
579
|
resources = field(db.ListField(db.EmbeddedDocumentField(Resource)), auditable=False)
|
|
581
580
|
|
|
582
581
|
private = field(db.BooleanField(default=False))
|
|
583
|
-
|
|
582
|
+
|
|
583
|
+
frequency = field(db.EnumField(UpdateFrequency))
|
|
584
584
|
frequency_date = field(db.DateTimeField(verbose_name=_("Future date of update")))
|
|
585
585
|
temporal_coverage = field(db.EmbeddedDocumentField(db.DateRange))
|
|
586
586
|
spatial = field(db.EmbeddedDocumentField(SpatialCoverage))
|
|
@@ -703,8 +703,6 @@ class Dataset(Auditable, WithMetrics, DatasetBadgeMixin, Owned, Linkable, db.Doc
|
|
|
703
703
|
|
|
704
704
|
def clean(self):
|
|
705
705
|
super(Dataset, self).clean()
|
|
706
|
-
if self.frequency in LEGACY_FREQUENCIES:
|
|
707
|
-
self.frequency = LEGACY_FREQUENCIES[self.frequency]
|
|
708
706
|
|
|
709
707
|
if len(set(res.id for res in self.resources)) != len(self.resources):
|
|
710
708
|
raise MongoEngineValidationError(f"Duplicate resource ID in dataset #{self.id}.")
|
|
@@ -782,8 +780,8 @@ class Dataset(Auditable, WithMetrics, DatasetBadgeMixin, Owned, Linkable, db.Doc
|
|
|
782
780
|
return self.owner.avatar.url
|
|
783
781
|
|
|
784
782
|
@property
|
|
785
|
-
def
|
|
786
|
-
return
|
|
783
|
+
def has_frequency(self):
|
|
784
|
+
return self.frequency not in [None, UpdateFrequency.UNKNOWN]
|
|
787
785
|
|
|
788
786
|
def check_availability(self):
|
|
789
787
|
"""Check if resources from that dataset are available.
|
|
@@ -835,33 +833,7 @@ class Dataset(Auditable, WithMetrics, DatasetBadgeMixin, Owned, Linkable, db.Doc
|
|
|
835
833
|
Ex: the next update for a threeTimesAday freq is not
|
|
836
834
|
every 8 hours, but is maximum 24 hours later.
|
|
837
835
|
"""
|
|
838
|
-
|
|
839
|
-
if self.frequency == "hourly":
|
|
840
|
-
delta = timedelta(hours=1)
|
|
841
|
-
elif self.frequency in ["fourTimesADay", "threeTimesADay", "semidaily", "daily"]:
|
|
842
|
-
delta = timedelta(days=1)
|
|
843
|
-
elif self.frequency in ["fourTimesAWeek", "threeTimesAWeek", "semiweekly", "weekly"]:
|
|
844
|
-
delta = timedelta(weeks=1)
|
|
845
|
-
elif self.frequency == "biweekly":
|
|
846
|
-
delta = timedelta(weeks=2)
|
|
847
|
-
elif self.frequency in ["threeTimesAMonth", "semimonthly", "monthly"]:
|
|
848
|
-
delta = timedelta(days=31)
|
|
849
|
-
elif self.frequency == "bimonthly":
|
|
850
|
-
delta = timedelta(days=31 * 2)
|
|
851
|
-
elif self.frequency == "quarterly":
|
|
852
|
-
delta = timedelta(days=365 / 4)
|
|
853
|
-
elif self.frequency in ["threeTimesAYear", "semiannual", "annual"]:
|
|
854
|
-
delta = timedelta(days=365)
|
|
855
|
-
elif self.frequency == "biennial":
|
|
856
|
-
delta = timedelta(days=365 * 2)
|
|
857
|
-
elif self.frequency == "triennial":
|
|
858
|
-
delta = timedelta(days=365 * 3)
|
|
859
|
-
elif self.frequency == "quinquennial":
|
|
860
|
-
delta = timedelta(days=365 * 5)
|
|
861
|
-
if delta is None:
|
|
862
|
-
return
|
|
863
|
-
else:
|
|
864
|
-
return self.last_update + delta
|
|
836
|
+
return self.frequency.next_update(self.last_update) if self.has_frequency else None
|
|
865
837
|
|
|
866
838
|
@property
|
|
867
839
|
def quality(self):
|
|
@@ -880,7 +852,7 @@ class Dataset(Auditable, WithMetrics, DatasetBadgeMixin, Owned, Linkable, db.Doc
|
|
|
880
852
|
# Allow for being one day late on update.
|
|
881
853
|
# We may have up to one day delay due to harvesting for example
|
|
882
854
|
quality["update_fulfilled_in_time"] = (next_update - datetime.utcnow()).days >= -1
|
|
883
|
-
elif self.
|
|
855
|
+
elif self.has_frequency and self.frequency.delta is None:
|
|
884
856
|
# For these frequencies, we don't expect regular updates or can't quantify them.
|
|
885
857
|
# Thus we consider the update_fulfilled_in_time quality criterion to be true.
|
|
886
858
|
quality["update_fulfilled_in_time"] = True
|
|
@@ -905,7 +877,7 @@ class Dataset(Auditable, WithMetrics, DatasetBadgeMixin, Owned, Linkable, db.Doc
|
|
|
905
877
|
result["temporal_coverage"] = True if self.temporal_coverage else False
|
|
906
878
|
result["spatial"] = True if self.spatial else False
|
|
907
879
|
|
|
908
|
-
result["update_frequency"] = self.
|
|
880
|
+
result["update_frequency"] = self.has_frequency
|
|
909
881
|
|
|
910
882
|
# We only save the next_update here because it is based on resources
|
|
911
883
|
# We cannot save the `update_fulfilled_in_time` because it is time
|
udata/core/dataset/rdf.py
CHANGED
|
@@ -52,54 +52,83 @@ from udata.rdf import (
|
|
|
52
52
|
)
|
|
53
53
|
from udata.utils import get_by, safe_unicode, to_naive_datetime
|
|
54
54
|
|
|
55
|
-
from .constants import OGC_SERVICE_FORMATS,
|
|
55
|
+
from .constants import OGC_SERVICE_FORMATS, UpdateFrequency
|
|
56
56
|
from .models import Checksum, Dataset, License, Resource
|
|
57
57
|
|
|
58
58
|
log = logging.getLogger(__name__)
|
|
59
59
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
60
|
+
FREQ_TERM_TO_UDATA = {
|
|
61
|
+
FREQ.continuous: UpdateFrequency.CONTINUOUS,
|
|
62
|
+
FREQ.daily: UpdateFrequency.DAILY,
|
|
63
|
+
FREQ.threeTimesAWeek: UpdateFrequency.THREE_TIMES_A_WEEK,
|
|
64
|
+
FREQ.semiweekly: UpdateFrequency.SEMIWEEKLY,
|
|
65
|
+
FREQ.weekly: UpdateFrequency.WEEKLY,
|
|
66
|
+
FREQ.biweekly: UpdateFrequency.BIWEEKLY,
|
|
67
|
+
FREQ.threeTimesAMonth: UpdateFrequency.THREE_TIMES_A_MONTH,
|
|
68
|
+
FREQ.semimonthly: UpdateFrequency.SEMIMONTHLY,
|
|
69
|
+
FREQ.monthly: UpdateFrequency.MONTHLY,
|
|
70
|
+
FREQ.bimonthly: UpdateFrequency.BIMONTHLY,
|
|
71
|
+
FREQ.quarterly: UpdateFrequency.QUARTERLY,
|
|
72
|
+
FREQ.threeTimesAYear: UpdateFrequency.THREE_TIMES_A_YEAR,
|
|
73
|
+
FREQ.semiannual: UpdateFrequency.SEMIANNUAL,
|
|
74
|
+
FREQ.annual: UpdateFrequency.ANNUAL,
|
|
75
|
+
FREQ.biennial: UpdateFrequency.BIENNIAL,
|
|
76
|
+
FREQ.triennial: UpdateFrequency.TRIENNIAL,
|
|
77
|
+
FREQ.irregular: UpdateFrequency.IRREGULAR,
|
|
78
|
+
}
|
|
79
|
+
FREQ_ID_TO_UDATA = {
|
|
80
|
+
namespace_manager.compute_qname(k)[2].lower(): v for k, v in FREQ_TERM_TO_UDATA.items()
|
|
70
81
|
}
|
|
71
82
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
EUFREQ
|
|
79
|
-
EUFREQ
|
|
80
|
-
EUFREQ.
|
|
81
|
-
EUFREQ.
|
|
82
|
-
EUFREQ.
|
|
83
|
-
EUFREQ
|
|
84
|
-
EUFREQ.
|
|
85
|
-
EUFREQ.
|
|
86
|
-
EUFREQ.
|
|
87
|
-
|
|
88
|
-
EUFREQ.
|
|
89
|
-
EUFREQ.
|
|
90
|
-
EUFREQ.
|
|
91
|
-
EUFREQ.
|
|
92
|
-
EUFREQ.
|
|
93
|
-
EUFREQ.
|
|
94
|
-
EUFREQ.
|
|
95
|
-
EUFREQ.
|
|
96
|
-
EUFREQ.
|
|
97
|
-
EUFREQ.
|
|
98
|
-
EUFREQ.
|
|
99
|
-
EUFREQ.
|
|
100
|
-
EUFREQ.
|
|
83
|
+
EUFREQ_TERM_TO_UDATA = {
|
|
84
|
+
EUFREQ.UNKNOWN: UpdateFrequency.UNKNOWN,
|
|
85
|
+
EUFREQ.UPDATE_CONT: UpdateFrequency.CONTINUOUS,
|
|
86
|
+
getattr(EUFREQ, "1MIN"): UpdateFrequency.ONE_MINUTE,
|
|
87
|
+
getattr(EUFREQ, "5MIN"): UpdateFrequency.FIVE_MINUTES,
|
|
88
|
+
getattr(EUFREQ, "10MIN"): UpdateFrequency.TEN_MINUTES,
|
|
89
|
+
getattr(EUFREQ, "15MIN"): UpdateFrequency.FIFTEEN_MINUTES,
|
|
90
|
+
getattr(EUFREQ, "30MIN"): UpdateFrequency.THIRTY_MINUTES,
|
|
91
|
+
EUFREQ.HOURLY: UpdateFrequency.HOURLY,
|
|
92
|
+
EUFREQ.BIHOURLY: UpdateFrequency.BIHOURLY,
|
|
93
|
+
EUFREQ.TRIHOURLY: UpdateFrequency.TRIHOURLY,
|
|
94
|
+
getattr(EUFREQ, "12HRS"): UpdateFrequency.TWELVE_HOURS,
|
|
95
|
+
EUFREQ.CONT: UpdateFrequency.SEVERAL_TIMES_A_DAY,
|
|
96
|
+
EUFREQ.DAILY_3: UpdateFrequency.THREE_TIMES_A_DAY,
|
|
97
|
+
EUFREQ.DAILY_2: UpdateFrequency.SEMIDAILY,
|
|
98
|
+
EUFREQ.DAILY: UpdateFrequency.DAILY,
|
|
99
|
+
EUFREQ.WEEKLY_5: UpdateFrequency.FIVE_TIMES_A_WEEK,
|
|
100
|
+
EUFREQ.WEEKLY_3: UpdateFrequency.THREE_TIMES_A_WEEK,
|
|
101
|
+
EUFREQ.WEEKLY_2: UpdateFrequency.SEMIWEEKLY,
|
|
102
|
+
EUFREQ.WEEKLY: UpdateFrequency.WEEKLY,
|
|
103
|
+
EUFREQ.BIWEEKLY: UpdateFrequency.BIWEEKLY,
|
|
104
|
+
EUFREQ.MONTHLY_3: UpdateFrequency.THREE_TIMES_A_MONTH,
|
|
105
|
+
EUFREQ.MONTHLY_2: UpdateFrequency.SEMIMONTHLY,
|
|
106
|
+
EUFREQ.MONTHLY: UpdateFrequency.MONTHLY,
|
|
107
|
+
EUFREQ.BIMONTHLY: UpdateFrequency.BIMONTHLY,
|
|
108
|
+
EUFREQ.QUARTERLY: UpdateFrequency.QUARTERLY,
|
|
109
|
+
EUFREQ.ANNUAL_3: UpdateFrequency.THREE_TIMES_A_YEAR,
|
|
110
|
+
EUFREQ.ANNUAL_2: UpdateFrequency.SEMIANNUAL,
|
|
111
|
+
EUFREQ.ANNUAL: UpdateFrequency.ANNUAL,
|
|
112
|
+
EUFREQ.BIENNIAL: UpdateFrequency.BIENNIAL,
|
|
113
|
+
EUFREQ.TRIENNIAL: UpdateFrequency.TRIENNIAL,
|
|
114
|
+
EUFREQ.QUADRENNIAL: UpdateFrequency.QUADRENNIAL,
|
|
115
|
+
EUFREQ.QUINQUENNIAL: UpdateFrequency.QUINQUENNIAL,
|
|
116
|
+
EUFREQ.DECENNIAL: UpdateFrequency.DECENNIAL,
|
|
117
|
+
EUFREQ.BIDECENNIAL: UpdateFrequency.BIDECENNIAL,
|
|
118
|
+
EUFREQ.TRIDECENNIAL: UpdateFrequency.TRIDECENNIAL,
|
|
119
|
+
EUFREQ.AS_NEEDED: UpdateFrequency.PUNCTUAL,
|
|
120
|
+
EUFREQ.IRREG: UpdateFrequency.IRREGULAR,
|
|
121
|
+
EUFREQ.NEVER: UpdateFrequency.NEVER,
|
|
122
|
+
EUFREQ.NOT_PLANNED: UpdateFrequency.NOT_PLANNED,
|
|
123
|
+
EUFREQ.OTHER: UpdateFrequency.OTHER,
|
|
124
|
+
}
|
|
125
|
+
EUFREQ_ID_TO_UDATA = {
|
|
126
|
+
namespace_manager.compute_qname(k)[2].lower(): v for k, v in EUFREQ_TERM_TO_UDATA.items()
|
|
101
127
|
}
|
|
102
128
|
|
|
129
|
+
# Merge order matters: we want FREQ to win over EUFREQ
|
|
130
|
+
UDATA_FREQ_ID_TO_TERM = {v: k for k, v in {**EUFREQ_TERM_TO_UDATA, **FREQ_TERM_TO_UDATA}.items()}
|
|
131
|
+
|
|
103
132
|
|
|
104
133
|
def temporal_to_rdf(daterange: db.DateRange, graph: Graph | None = None) -> RdfResource | None:
|
|
105
134
|
if not daterange:
|
|
@@ -114,10 +143,8 @@ def temporal_to_rdf(daterange: db.DateRange, graph: Graph | None = None) -> RdfR
|
|
|
114
143
|
return pot
|
|
115
144
|
|
|
116
145
|
|
|
117
|
-
def frequency_to_rdf(frequency:
|
|
118
|
-
|
|
119
|
-
return
|
|
120
|
-
return RDF_FREQUENCIES.get(frequency, getattr(FREQ, frequency))
|
|
146
|
+
def frequency_to_rdf(frequency: UpdateFrequency | None, graph: Graph | None = None) -> str | None:
|
|
147
|
+
return UDATA_FREQ_ID_TO_TERM.get(frequency)
|
|
121
148
|
|
|
122
149
|
|
|
123
150
|
def owner_to_rdf(dataset: Dataset, graph: Graph | None = None) -> RdfResource | None:
|
|
@@ -333,8 +360,7 @@ def dataset_to_rdf(dataset: Dataset, graph: Graph | None = None) -> RdfResource:
|
|
|
333
360
|
if dataset.temporal_coverage:
|
|
334
361
|
d.set(DCT.temporal, temporal_to_rdf(dataset.temporal_coverage, graph))
|
|
335
362
|
|
|
336
|
-
frequency
|
|
337
|
-
if frequency:
|
|
363
|
+
if frequency := frequency_to_rdf(dataset.frequency):
|
|
338
364
|
d.set(DCT.accrualPeriodicity, frequency)
|
|
339
365
|
|
|
340
366
|
owner_role = DCT.publisher
|
|
@@ -510,23 +536,19 @@ def spatial_from_rdf(graph):
|
|
|
510
536
|
return None
|
|
511
537
|
|
|
512
538
|
|
|
513
|
-
def frequency_from_rdf(term):
|
|
539
|
+
def frequency_from_rdf(term) -> UpdateFrequency | None:
|
|
514
540
|
if isinstance(term, str):
|
|
515
541
|
try:
|
|
516
542
|
term = URIRef(uris.validate(term))
|
|
517
543
|
except uris.ValidationError:
|
|
518
544
|
pass
|
|
519
545
|
if isinstance(term, Literal):
|
|
520
|
-
|
|
521
|
-
|
|
546
|
+
term = term.toPython().lower()
|
|
547
|
+
return FREQ_ID_TO_UDATA.get(term) or EUFREQ_ID_TO_UDATA.get(term)
|
|
522
548
|
if isinstance(term, RdfResource):
|
|
523
549
|
term = term.identifier
|
|
524
550
|
if isinstance(term, URIRef):
|
|
525
|
-
|
|
526
|
-
return EU_RDF_REQUENCIES.get(term)
|
|
527
|
-
_, _, freq = namespace_manager.compute_qname(term)
|
|
528
|
-
if freq.lower() in UPDATE_FREQUENCIES:
|
|
529
|
-
return freq.lower()
|
|
551
|
+
return FREQ_TERM_TO_UDATA.get(term) or EUFREQ_TERM_TO_UDATA.get(term)
|
|
530
552
|
|
|
531
553
|
|
|
532
554
|
def mime_from_rdf(resource):
|
udata/core/dataset/tasks.py
CHANGED
|
@@ -1,22 +1,19 @@
|
|
|
1
1
|
import collections
|
|
2
2
|
import os
|
|
3
|
-
from datetime import datetime
|
|
3
|
+
from datetime import datetime
|
|
4
4
|
from tempfile import NamedTemporaryFile
|
|
5
5
|
|
|
6
6
|
from celery.utils.log import get_task_logger
|
|
7
7
|
from flask import current_app
|
|
8
8
|
from mongoengine import ValidationError
|
|
9
9
|
|
|
10
|
-
from udata import mail
|
|
11
10
|
from udata import models as udata_models
|
|
12
11
|
from udata.core import csv, storages
|
|
13
12
|
from udata.core.dataservices.models import Dataservice
|
|
14
13
|
from udata.harvest.models import HarvestJob
|
|
15
|
-
from udata.
|
|
16
|
-
from udata.models import Activity, Discussion, Follow, Organization, TopicElement, Transfer, db
|
|
14
|
+
from udata.models import Activity, Discussion, Follow, TopicElement, Transfer, db
|
|
17
15
|
from udata.tasks import job
|
|
18
16
|
|
|
19
|
-
from .constants import UPDATE_FREQUENCIES
|
|
20
17
|
from .models import Checksum, CommunityResource, Dataset, Resource
|
|
21
18
|
|
|
22
19
|
log = get_task_logger(__name__)
|
|
@@ -75,51 +72,6 @@ def purge_datasets(self):
|
|
|
75
72
|
dataset.delete()
|
|
76
73
|
|
|
77
74
|
|
|
78
|
-
@job("send-frequency-reminder")
|
|
79
|
-
def send_frequency_reminder(self):
|
|
80
|
-
# We exclude irrelevant frequencies.
|
|
81
|
-
frequencies = [
|
|
82
|
-
f
|
|
83
|
-
for f in UPDATE_FREQUENCIES.keys()
|
|
84
|
-
if f not in ("unknown", "realtime", "punctual", "irregular", "continuous")
|
|
85
|
-
]
|
|
86
|
-
now = datetime.utcnow()
|
|
87
|
-
reminded_orgs = {}
|
|
88
|
-
reminded_people = []
|
|
89
|
-
allowed_delay = current_app.config["DELAY_BEFORE_REMINDER_NOTIFICATION"]
|
|
90
|
-
for org in Organization.objects.visible():
|
|
91
|
-
outdated_datasets = []
|
|
92
|
-
for dataset in Dataset.objects.filter(
|
|
93
|
-
frequency__in=frequencies, organization=org
|
|
94
|
-
).visible():
|
|
95
|
-
if dataset.next_update + timedelta(days=allowed_delay) < now:
|
|
96
|
-
dataset.outdated = now - dataset.next_update
|
|
97
|
-
dataset.frequency_str = UPDATE_FREQUENCIES[dataset.frequency]
|
|
98
|
-
outdated_datasets.append(dataset)
|
|
99
|
-
if outdated_datasets:
|
|
100
|
-
reminded_orgs[org] = outdated_datasets
|
|
101
|
-
for reminded_org, datasets in reminded_orgs.items():
|
|
102
|
-
print(
|
|
103
|
-
"{org.name} will be emailed for {datasets_nb} datasets".format(
|
|
104
|
-
org=reminded_org, datasets_nb=len(datasets)
|
|
105
|
-
)
|
|
106
|
-
)
|
|
107
|
-
recipients = [m.user for m in reminded_org.members]
|
|
108
|
-
reminded_people.append(recipients)
|
|
109
|
-
subject = _("You need to update some frequency-based datasets")
|
|
110
|
-
mail.send(subject, recipients, "frequency_reminder", org=reminded_org, datasets=datasets)
|
|
111
|
-
|
|
112
|
-
print("{nb_orgs} orgs concerned".format(nb_orgs=len(reminded_orgs)))
|
|
113
|
-
reminded_people = list(flatten(reminded_people))
|
|
114
|
-
print(
|
|
115
|
-
"{nb_emails} people contacted ({nb_emails_twice} twice)".format(
|
|
116
|
-
nb_emails=len(reminded_people),
|
|
117
|
-
nb_emails_twice=len(reminded_people) - len(set(reminded_people)),
|
|
118
|
-
)
|
|
119
|
-
)
|
|
120
|
-
print("Done")
|
|
121
|
-
|
|
122
|
-
|
|
123
75
|
def get_queryset(model_cls):
|
|
124
76
|
# special case for resources
|
|
125
77
|
if model_cls.__name__ == "Resource":
|
udata/cors.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
|
|
3
|
-
from flask import g, request
|
|
3
|
+
from flask import current_app, g, request
|
|
4
4
|
from werkzeug.datastructures import Headers
|
|
5
5
|
|
|
6
6
|
log = logging.getLogger(__name__)
|
|
@@ -36,10 +36,27 @@ def is_allowed_cors_route():
|
|
|
36
36
|
path: str = request.path.removeprefix(f"/{g.lang_code}")
|
|
37
37
|
else:
|
|
38
38
|
path: str = request.path
|
|
39
|
+
|
|
40
|
+
# Allow to keep clean CORS when `udata` and the frontend are on the same domain
|
|
41
|
+
# (as it's the case in data.gouv with cdata/udata).
|
|
42
|
+
if not current_app.config["SECURITY_SPA_ON_SAME_DOMAIN"] and (
|
|
43
|
+
path.startswith("/login")
|
|
44
|
+
or path.startswith("/logout")
|
|
45
|
+
or path.startswith("/reset")
|
|
46
|
+
or path.startswith("/register")
|
|
47
|
+
or path.startswith("/confirm")
|
|
48
|
+
or path.startswith("/change")
|
|
49
|
+
or path.startswith("/change-email")
|
|
50
|
+
or path.startswith("/oauth")
|
|
51
|
+
or path.startswith("/get-csrf")
|
|
52
|
+
):
|
|
53
|
+
return True
|
|
54
|
+
|
|
39
55
|
return (
|
|
40
56
|
path.endswith((".js", ".css", ".woff", ".woff2", ".png", ".jpg", ".jpeg", ".svg"))
|
|
41
57
|
or path.startswith("/api")
|
|
42
|
-
or path.startswith("/oauth")
|
|
58
|
+
or path.startswith("/oauth/token")
|
|
59
|
+
or path.startswith("/oauth/revoke")
|
|
43
60
|
or path.startswith("/datasets/r/")
|
|
44
61
|
)
|
|
45
62
|
|