udata 14.0.0__py3-none-any.whl → 14.5.1.dev6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of udata might be problematic. Click here for more details.
- udata/api/__init__.py +2 -0
- udata/api_fields.py +35 -4
- udata/app.py +18 -20
- udata/auth/__init__.py +29 -6
- udata/auth/forms.py +2 -2
- udata/auth/views.py +13 -6
- udata/commands/dcat.py +1 -1
- udata/commands/serve.py +3 -11
- udata/commands/tests/test_fixtures.py +9 -9
- udata/core/access_type/api.py +1 -1
- udata/core/access_type/constants.py +12 -8
- udata/core/activity/api.py +5 -6
- udata/core/badges/tests/test_commands.py +6 -6
- udata/core/csv.py +5 -0
- udata/core/dataservices/api.py +8 -1
- udata/core/dataservices/apiv2.py +2 -5
- udata/core/dataservices/models.py +5 -2
- udata/core/dataservices/rdf.py +2 -1
- udata/core/dataservices/tasks.py +13 -2
- udata/core/dataset/api.py +10 -0
- udata/core/dataset/models.py +6 -6
- udata/core/dataset/permissions.py +31 -0
- udata/core/dataset/rdf.py +8 -2
- udata/core/dataset/tasks.py +23 -7
- udata/core/discussions/api.py +15 -1
- udata/core/discussions/models.py +6 -0
- udata/core/legal/__init__.py +0 -0
- udata/core/legal/mails.py +128 -0
- udata/core/organization/api.py +16 -5
- udata/core/organization/apiv2.py +2 -3
- udata/core/organization/mails.py +1 -1
- udata/core/organization/models.py +15 -2
- udata/core/organization/notifications.py +84 -0
- udata/core/organization/permissions.py +1 -1
- udata/core/organization/tasks.py +3 -0
- udata/core/pages/tests/test_api.py +32 -0
- udata/core/post/api.py +24 -69
- udata/core/post/models.py +84 -16
- udata/core/post/tests/test_api.py +24 -1
- udata/core/reports/api.py +18 -0
- udata/core/reports/models.py +42 -2
- udata/core/reuse/api.py +8 -0
- udata/core/reuse/apiv2.py +2 -5
- udata/core/reuse/models.py +1 -1
- udata/core/reuse/tasks.py +7 -0
- udata/core/spatial/forms.py +2 -2
- udata/core/topic/models.py +8 -2
- udata/core/user/api.py +10 -3
- udata/core/user/models.py +12 -2
- udata/features/notifications/api.py +7 -18
- udata/features/notifications/models.py +56 -0
- udata/features/notifications/tasks.py +25 -0
- udata/flask_mongoengine/engine.py +0 -4
- udata/flask_mongoengine/pagination.py +1 -1
- udata/frontend/markdown.py +2 -1
- udata/harvest/actions.py +21 -1
- udata/harvest/api.py +25 -8
- udata/harvest/backends/base.py +27 -1
- udata/harvest/backends/ckan/harvesters.py +11 -2
- udata/harvest/backends/dcat.py +4 -1
- udata/harvest/commands.py +33 -0
- udata/harvest/filters.py +17 -6
- udata/harvest/models.py +16 -0
- udata/harvest/permissions.py +27 -0
- udata/harvest/tests/ckan/test_ckan_backend.py +33 -0
- udata/harvest/tests/test_actions.py +58 -5
- udata/harvest/tests/test_api.py +276 -122
- udata/harvest/tests/test_base_backend.py +86 -1
- udata/harvest/tests/test_dcat_backend.py +81 -10
- udata/harvest/tests/test_filters.py +6 -0
- udata/i18n.py +1 -4
- udata/mail.py +19 -1
- udata/migrations/2025-10-31-create-membership-request-notifications.py +55 -0
- udata/migrations/2025-12-04-add-uuid-to-discussion-messages.py +28 -0
- udata/mongo/slug_fields.py +1 -1
- udata/rdf.py +58 -10
- udata/routing.py +2 -2
- udata/settings.py +11 -0
- udata/tasks.py +1 -0
- udata/templates/mail/message.html +5 -31
- udata/tests/__init__.py +27 -2
- udata/tests/api/__init__.py +108 -21
- udata/tests/api/test_activities_api.py +36 -0
- udata/tests/api/test_auth_api.py +121 -95
- udata/tests/api/test_base_api.py +7 -4
- udata/tests/api/test_datasets_api.py +50 -19
- udata/tests/api/test_organizations_api.py +192 -197
- udata/tests/api/test_reports_api.py +157 -0
- udata/tests/api/test_reuses_api.py +147 -147
- udata/tests/api/test_security_api.py +12 -12
- udata/tests/api/test_swagger.py +4 -4
- udata/tests/api/test_tags_api.py +8 -8
- udata/tests/api/test_user_api.py +1 -1
- udata/tests/apiv2/test_search.py +30 -0
- udata/tests/apiv2/test_swagger.py +4 -4
- udata/tests/cli/test_cli_base.py +8 -9
- udata/tests/dataservice/test_dataservice_tasks.py +29 -0
- udata/tests/dataset/test_dataset_commands.py +4 -4
- udata/tests/dataset/test_dataset_model.py +66 -26
- udata/tests/dataset/test_dataset_rdf.py +99 -5
- udata/tests/dataset/test_dataset_tasks.py +25 -0
- udata/tests/frontend/test_auth.py +58 -1
- udata/tests/frontend/test_csv.py +0 -3
- udata/tests/helpers.py +31 -27
- udata/tests/organization/test_notifications.py +67 -2
- udata/tests/plugin.py +6 -261
- udata/tests/search/test_search_integration.py +33 -0
- udata/tests/site/test_site_csv_exports.py +22 -10
- udata/tests/test_activity.py +9 -9
- udata/tests/test_api_fields.py +10 -0
- udata/tests/test_dcat_commands.py +2 -2
- udata/tests/test_discussions.py +5 -5
- udata/tests/test_legal_mails.py +359 -0
- udata/tests/test_migrations.py +21 -21
- udata/tests/test_notifications.py +15 -57
- udata/tests/test_notifications_task.py +43 -0
- udata/tests/test_owned.py +81 -1
- udata/tests/test_storages.py +25 -19
- udata/tests/test_topics.py +77 -61
- udata/tests/test_uris.py +33 -0
- udata/tests/workers/test_jobs_commands.py +23 -23
- udata/translations/ar/LC_MESSAGES/udata.mo +0 -0
- udata/translations/ar/LC_MESSAGES/udata.po +187 -108
- udata/translations/de/LC_MESSAGES/udata.mo +0 -0
- udata/translations/de/LC_MESSAGES/udata.po +187 -108
- udata/translations/es/LC_MESSAGES/udata.mo +0 -0
- udata/translations/es/LC_MESSAGES/udata.po +187 -108
- udata/translations/fr/LC_MESSAGES/udata.mo +0 -0
- udata/translations/fr/LC_MESSAGES/udata.po +188 -109
- udata/translations/it/LC_MESSAGES/udata.mo +0 -0
- udata/translations/it/LC_MESSAGES/udata.po +187 -108
- udata/translations/pt/LC_MESSAGES/udata.mo +0 -0
- udata/translations/pt/LC_MESSAGES/udata.po +187 -108
- udata/translations/sr/LC_MESSAGES/udata.mo +0 -0
- udata/translations/sr/LC_MESSAGES/udata.po +187 -108
- udata/translations/udata.pot +215 -106
- udata/uris.py +0 -2
- udata-14.5.1.dev6.dist-info/METADATA +109 -0
- {udata-14.0.0.dist-info → udata-14.5.1.dev6.dist-info}/RECORD +143 -140
- udata/core/post/forms.py +0 -30
- udata/flask_mongoengine/json.py +0 -38
- udata/templates/mail/base.html +0 -105
- udata/templates/mail/base.txt +0 -6
- udata/templates/mail/button.html +0 -3
- udata/templates/mail/layouts/1-column.html +0 -19
- udata/templates/mail/layouts/2-columns.html +0 -20
- udata/templates/mail/layouts/center-panel.html +0 -16
- udata-14.0.0.dist-info/METADATA +0 -132
- {udata-14.0.0.dist-info → udata-14.5.1.dev6.dist-info}/WHEEL +0 -0
- {udata-14.0.0.dist-info → udata-14.5.1.dev6.dist-info}/entry_points.txt +0 -0
- {udata-14.0.0.dist-info → udata-14.5.1.dev6.dist-info}/licenses/LICENSE +0 -0
- {udata-14.0.0.dist-info → udata-14.5.1.dev6.dist-info}/top_level.txt +0 -0
udata/core/spatial/forms.py
CHANGED
|
@@ -64,8 +64,8 @@ class GeomField(Field):
|
|
|
64
64
|
self.data = geojson.GeoJSON.to_instance(value)
|
|
65
65
|
except Exception:
|
|
66
66
|
self.data = None
|
|
67
|
-
log.
|
|
68
|
-
raise
|
|
67
|
+
log.warning(f"Unable to parse GeoJSON: {value}")
|
|
68
|
+
raise validators.ValidationError(self.gettext("Not a valid GeoJSON"))
|
|
69
69
|
|
|
70
70
|
def pre_validate(self, form):
|
|
71
71
|
if self.data:
|
udata/core/topic/models.py
CHANGED
|
@@ -16,7 +16,10 @@ __all__ = ("Topic", "TopicElement")
|
|
|
16
16
|
|
|
17
17
|
class TopicElement(Auditable, db.Document):
|
|
18
18
|
title = field(db.StringField(required=False))
|
|
19
|
-
description = field(
|
|
19
|
+
description = field(
|
|
20
|
+
db.StringField(required=False),
|
|
21
|
+
markdown=True,
|
|
22
|
+
)
|
|
20
23
|
tags = field(db.ListField(db.StringField()))
|
|
21
24
|
extras = field(db.ExtrasField())
|
|
22
25
|
element = field(db.GenericReferenceField(choices=["Dataset", "Reuse", "Dataservice"]))
|
|
@@ -63,7 +66,10 @@ class Topic(db.Datetimed, Auditable, Linkable, db.Document, Owned):
|
|
|
63
66
|
db.SlugField(max_length=255, required=True, populate_from="name", update=True, follow=True),
|
|
64
67
|
auditable=False,
|
|
65
68
|
)
|
|
66
|
-
description = field(
|
|
69
|
+
description = field(
|
|
70
|
+
db.StringField(),
|
|
71
|
+
markdown=True,
|
|
72
|
+
)
|
|
67
73
|
tags = field(db.ListField(db.StringField()))
|
|
68
74
|
color = field(db.IntField())
|
|
69
75
|
|
udata/core/user/api.py
CHANGED
|
@@ -8,6 +8,7 @@ from udata.core.dataset.api_fields import community_resource_fields, dataset_fie
|
|
|
8
8
|
from udata.core.discussions.actions import discussions_for
|
|
9
9
|
from udata.core.discussions.api import discussion_fields
|
|
10
10
|
from udata.core.followers.api import FollowAPI
|
|
11
|
+
from udata.core.legal.mails import add_send_legal_notice_argument, send_legal_notice_on_deletion
|
|
11
12
|
from udata.core.storages.api import (
|
|
12
13
|
image_parser,
|
|
13
14
|
parse_uploaded_image,
|
|
@@ -265,11 +266,14 @@ class UserAvatarAPI(API):
|
|
|
265
266
|
return {"image": user.avatar}
|
|
266
267
|
|
|
267
268
|
|
|
268
|
-
delete_parser = api.parser()
|
|
269
|
+
delete_parser = add_send_legal_notice_argument(api.parser())
|
|
269
270
|
delete_parser.add_argument(
|
|
270
271
|
"no_mail",
|
|
271
272
|
type=bool,
|
|
272
|
-
help=
|
|
273
|
+
help=(
|
|
274
|
+
"Do not send the simple deletion notification email. "
|
|
275
|
+
"Note: automatically set to True when send_legal_notice=True to avoid sending duplicate emails."
|
|
276
|
+
),
|
|
273
277
|
location="args",
|
|
274
278
|
default=False,
|
|
275
279
|
)
|
|
@@ -321,8 +325,11 @@ class UserAPI(API):
|
|
|
321
325
|
api.abort(
|
|
322
326
|
403, "You cannot delete yourself with this API. " + 'Use the "me" API instead.'
|
|
323
327
|
)
|
|
328
|
+
send_legal_notice_on_deletion(user, args)
|
|
324
329
|
|
|
325
|
-
|
|
330
|
+
# Skip simple notification if legal notice is sent (to avoid duplicate emails)
|
|
331
|
+
skip_notification = args["no_mail"] or args["send_legal_notice"]
|
|
332
|
+
user.mark_as_deleted(notify=not skip_notification, delete_comments=args["delete_comments"])
|
|
326
333
|
return "", 204
|
|
327
334
|
|
|
328
335
|
|
udata/core/user/models.py
CHANGED
|
@@ -18,6 +18,7 @@ from udata.core.discussions.models import Discussion
|
|
|
18
18
|
from udata.core.linkable import Linkable
|
|
19
19
|
from udata.core.storages import avatars, default_image_basename
|
|
20
20
|
from udata.frontend.markdown import mdstrip
|
|
21
|
+
from udata.i18n import lazy_gettext as _
|
|
21
22
|
from udata.models import Follow, WithMetrics, db
|
|
22
23
|
from udata.uris import cdata_url
|
|
23
24
|
|
|
@@ -62,7 +63,10 @@ class User(WithMetrics, UserMixin, Linkable, db.Document):
|
|
|
62
63
|
db.ImageField(fs=avatars, basename=default_image_basename, thumbnails=AVATAR_SIZES)
|
|
63
64
|
)
|
|
64
65
|
website = field(db.URLField())
|
|
65
|
-
about = field(
|
|
66
|
+
about = field(
|
|
67
|
+
db.StringField(),
|
|
68
|
+
markdown=True,
|
|
69
|
+
)
|
|
66
70
|
|
|
67
71
|
prefered_language = field(db.StringField())
|
|
68
72
|
|
|
@@ -116,6 +120,8 @@ class User(WithMetrics, UserMixin, Linkable, db.Document):
|
|
|
116
120
|
"auto_create_index_on_save": True,
|
|
117
121
|
}
|
|
118
122
|
|
|
123
|
+
verbose_name = _("account")
|
|
124
|
+
|
|
119
125
|
__metrics_keys__ = [
|
|
120
126
|
"datasets",
|
|
121
127
|
"reuses",
|
|
@@ -142,7 +148,7 @@ class User(WithMetrics, UserMixin, Linkable, db.Document):
|
|
|
142
148
|
return self.has_role("admin")
|
|
143
149
|
|
|
144
150
|
def self_web_url(self, **kwargs):
|
|
145
|
-
return cdata_url(f"/users/{self._link_id(**kwargs)}
|
|
151
|
+
return cdata_url(f"/users/{self._link_id(**kwargs)}", **kwargs)
|
|
146
152
|
|
|
147
153
|
def self_api_url(self, **kwargs):
|
|
148
154
|
return url_for(
|
|
@@ -297,6 +303,10 @@ class User(WithMetrics, UserMixin, Linkable, db.Document):
|
|
|
297
303
|
discussion.save()
|
|
298
304
|
Follow.objects(follower=self).delete()
|
|
299
305
|
Follow.objects(following=self).delete()
|
|
306
|
+
# Remove related notifications
|
|
307
|
+
from udata.features.notifications.models import Notification
|
|
308
|
+
|
|
309
|
+
Notification.objects.with_user_in_details(self).delete()
|
|
300
310
|
|
|
301
311
|
from udata.models import ContactPoint
|
|
302
312
|
|
|
@@ -1,30 +1,19 @@
|
|
|
1
|
-
from udata.api import API, api
|
|
1
|
+
from udata.api import API, api
|
|
2
2
|
from udata.auth import current_user
|
|
3
3
|
|
|
4
|
-
from .
|
|
4
|
+
from .models import Notification
|
|
5
5
|
|
|
6
6
|
notifs = api.namespace("notifications", "Notifications API")
|
|
7
7
|
|
|
8
|
-
notifications_fields = api.model(
|
|
9
|
-
"Notification",
|
|
10
|
-
{
|
|
11
|
-
"type": fields.String(description="The notification type", readonly=True),
|
|
12
|
-
"created_on": fields.ISODateTime(
|
|
13
|
-
description="The notification creation datetime", readonly=True
|
|
14
|
-
),
|
|
15
|
-
"details": fields.Raw(
|
|
16
|
-
description="Key-Value details depending on notification type", readonly=True
|
|
17
|
-
),
|
|
18
|
-
},
|
|
19
|
-
)
|
|
20
|
-
|
|
21
8
|
|
|
22
9
|
@notifs.route("/", endpoint="notifications")
|
|
23
10
|
class NotificationsAPI(API):
|
|
24
11
|
@api.secure
|
|
25
|
-
@api.doc("
|
|
26
|
-
@api.
|
|
12
|
+
@api.doc("list_notifications")
|
|
13
|
+
@api.expect(Notification.__index_parser__)
|
|
14
|
+
@api.marshal_with(Notification.__page_fields__)
|
|
27
15
|
def get(self):
|
|
28
16
|
"""List all current user pending notifications"""
|
|
29
17
|
user = current_user._get_current_object()
|
|
30
|
-
|
|
18
|
+
notifications = Notification.objects(user=user)
|
|
19
|
+
return Notification.apply_pagination(Notification.apply_sort_filters(notifications))
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from flask_restx.inputs import boolean
|
|
2
|
+
from mongoengine import NULLIFY
|
|
3
|
+
|
|
4
|
+
from udata.api_fields import field, generate_fields
|
|
5
|
+
from udata.core.organization.notifications import MembershipRequestNotificationDetails
|
|
6
|
+
from udata.core.user.api_fields import user_ref_fields
|
|
7
|
+
from udata.core.user.models import User
|
|
8
|
+
from udata.models import db
|
|
9
|
+
from udata.mongo.datetime_fields import Datetimed
|
|
10
|
+
from udata.mongo.queryset import UDataQuerySet
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class NotificationQuerySet(UDataQuerySet):
|
|
14
|
+
def with_organization_in_details(self, organization):
|
|
15
|
+
"""This function must be updated to handle new details cases"""
|
|
16
|
+
return self(details__request_organization=organization)
|
|
17
|
+
|
|
18
|
+
def with_user_in_details(self, user):
|
|
19
|
+
"""This function must be updated to handle new details cases"""
|
|
20
|
+
return self(details__request_user=user)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def is_handled(base_query, filter_value):
|
|
24
|
+
if filter_value is None:
|
|
25
|
+
return base_query
|
|
26
|
+
if filter_value is True:
|
|
27
|
+
return base_query.filter(handled_at__ne=None)
|
|
28
|
+
return base_query.filter(handled_at=None)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@generate_fields()
|
|
32
|
+
class Notification(Datetimed, db.Document):
|
|
33
|
+
meta = {
|
|
34
|
+
"ordering": ["-created_at"],
|
|
35
|
+
"queryset_class": NotificationQuerySet,
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
id = field(db.AutoUUIDField(primary_key=True))
|
|
39
|
+
handled_at = field(
|
|
40
|
+
db.DateTimeField(),
|
|
41
|
+
sortable=True,
|
|
42
|
+
auditable=False,
|
|
43
|
+
filterable={"key": "handled", "query": is_handled, "type": boolean},
|
|
44
|
+
)
|
|
45
|
+
user = field(
|
|
46
|
+
db.ReferenceField(User, reverse_delete_rule=NULLIFY),
|
|
47
|
+
nested_fields=user_ref_fields,
|
|
48
|
+
readonly=True,
|
|
49
|
+
allow_null=True,
|
|
50
|
+
auditable=False,
|
|
51
|
+
filterable={},
|
|
52
|
+
)
|
|
53
|
+
details = field(
|
|
54
|
+
db.GenericEmbeddedDocumentField(choices=(MembershipRequestNotificationDetails,)),
|
|
55
|
+
generic=True,
|
|
56
|
+
)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from datetime import datetime, timedelta
|
|
3
|
+
|
|
4
|
+
from flask import current_app
|
|
5
|
+
|
|
6
|
+
from udata.features.notifications.models import Notification
|
|
7
|
+
from udata.tasks import job
|
|
8
|
+
|
|
9
|
+
log = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@job("delete-expired-notifications")
|
|
13
|
+
def delete_expired_notifications(self):
|
|
14
|
+
# Delete expired notifications
|
|
15
|
+
handled_at = datetime.utcnow() - timedelta(
|
|
16
|
+
days=current_app.config["DAYS_AFTER_NOTIFICATION_EXPIRED"]
|
|
17
|
+
)
|
|
18
|
+
notifications_to_delete = Notification.objects(
|
|
19
|
+
handled_at__lte=handled_at,
|
|
20
|
+
)
|
|
21
|
+
count = notifications_to_delete.count()
|
|
22
|
+
for notification in notifications_to_delete:
|
|
23
|
+
notification.delete()
|
|
24
|
+
|
|
25
|
+
log.info(f"Deleted {count} expired notifications")
|
|
@@ -7,7 +7,6 @@ from mongoengine.errors import DoesNotExist
|
|
|
7
7
|
from mongoengine.queryset import QuerySet
|
|
8
8
|
|
|
9
9
|
from .connection import create_connections
|
|
10
|
-
from .json import override_json_encoder
|
|
11
10
|
from .pagination import ListFieldPagination, Pagination
|
|
12
11
|
from .wtf import WtfBaseField
|
|
13
12
|
|
|
@@ -108,9 +107,6 @@ class MongoEngine(object):
|
|
|
108
107
|
|
|
109
108
|
app.extensions = getattr(app, "extensions", {})
|
|
110
109
|
|
|
111
|
-
# Make documents JSON serializable
|
|
112
|
-
override_json_encoder(app)
|
|
113
|
-
|
|
114
110
|
if "mongoengine" not in app.extensions:
|
|
115
111
|
app.extensions["mongoengine"] = {}
|
|
116
112
|
|
udata/frontend/markdown.py
CHANGED
|
@@ -7,8 +7,9 @@ import html2text
|
|
|
7
7
|
import mistune
|
|
8
8
|
from bleach.css_sanitizer import CSSSanitizer
|
|
9
9
|
from bleach.linkifier import LinkifyFilter
|
|
10
|
-
from flask import
|
|
10
|
+
from flask import current_app, request
|
|
11
11
|
from jinja2.filters import do_striptags, do_truncate
|
|
12
|
+
from markupsafe import Markup
|
|
12
13
|
from werkzeug.local import LocalProxy
|
|
13
14
|
|
|
14
15
|
from udata.i18n import _
|
udata/harvest/actions.py
CHANGED
|
@@ -254,7 +254,7 @@ def schedule(
|
|
|
254
254
|
source.modify(
|
|
255
255
|
periodic_task=PeriodicTask.objects.create(
|
|
256
256
|
task="harvest",
|
|
257
|
-
name="Harvest {
|
|
257
|
+
name=f"Harvest {source.name} ({source.id})",
|
|
258
258
|
description="Periodic Harvesting",
|
|
259
259
|
enabled=True,
|
|
260
260
|
args=[str(source.id)],
|
|
@@ -317,3 +317,23 @@ def attach(domain, filename):
|
|
|
317
317
|
count += 1
|
|
318
318
|
|
|
319
319
|
return AttachResult(count, errors)
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def detach(dataset: Dataset):
|
|
323
|
+
"""Detach a dataset from its harvest source
|
|
324
|
+
|
|
325
|
+
The dataset will be cleaned from harvested information
|
|
326
|
+
and will no longer be updated or archived by harvesting.
|
|
327
|
+
"""
|
|
328
|
+
dataset.harvest = None
|
|
329
|
+
for resource in dataset.resources:
|
|
330
|
+
resource.harvest = None
|
|
331
|
+
dataset.save()
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def detach_all_from_source(source: HarvestSource):
|
|
335
|
+
"""Detach all datasets linked to a harvest source"""
|
|
336
|
+
datasets = Dataset.objects.filter(harvest__source_id=str(source.id))
|
|
337
|
+
for dataset in datasets:
|
|
338
|
+
detach(dataset)
|
|
339
|
+
return len(datasets)
|
udata/harvest/api.py
CHANGED
|
@@ -6,7 +6,6 @@ from udata.api import API, api, fields
|
|
|
6
6
|
from udata.auth import admin_permission
|
|
7
7
|
from udata.core.dataservices.models import Dataservice
|
|
8
8
|
from udata.core.dataset.api_fields import dataset_fields, dataset_ref_fields
|
|
9
|
-
from udata.core.dataset.permissions import OwnablePermission
|
|
10
9
|
from udata.core.organization.api_fields import org_ref_fields
|
|
11
10
|
from udata.core.organization.permissions import EditOrganizationPermission
|
|
12
11
|
from udata.core.user.api_fields import user_ref_fields
|
|
@@ -55,6 +54,7 @@ item_fields = api.model(
|
|
|
55
54
|
"HarvestItem",
|
|
56
55
|
{
|
|
57
56
|
"remote_id": fields.String(description="The item remote ID to process", required=True),
|
|
57
|
+
"remote_url": fields.String(description="The item remote url (if available)"),
|
|
58
58
|
"dataset": fields.Nested(
|
|
59
59
|
dataset_ref_fields, description="The processed dataset", allow_null=True
|
|
60
60
|
),
|
|
@@ -115,6 +115,18 @@ validation_fields = api.model(
|
|
|
115
115
|
},
|
|
116
116
|
)
|
|
117
117
|
|
|
118
|
+
source_permissions_fields = api.model(
|
|
119
|
+
"HarvestSourcePermissions",
|
|
120
|
+
{
|
|
121
|
+
"edit": fields.Permission(),
|
|
122
|
+
"delete": fields.Permission(),
|
|
123
|
+
"run": fields.Permission(),
|
|
124
|
+
"preview": fields.Permission(),
|
|
125
|
+
"validate": fields.Permission(),
|
|
126
|
+
"schedule": fields.Permission(),
|
|
127
|
+
},
|
|
128
|
+
)
|
|
129
|
+
|
|
118
130
|
source_fields = api.model(
|
|
119
131
|
"HarvestSource",
|
|
120
132
|
{
|
|
@@ -153,6 +165,7 @@ source_fields = api.model(
|
|
|
153
165
|
"schedule": fields.String(
|
|
154
166
|
description="The source schedule (interval or cron expression)", readonly=True
|
|
155
167
|
),
|
|
168
|
+
"permissions": fields.Nested(source_permissions_fields, readonly=True),
|
|
156
169
|
},
|
|
157
170
|
)
|
|
158
171
|
|
|
@@ -313,7 +326,7 @@ class SourceAPI(API):
|
|
|
313
326
|
@api.marshal_with(source_fields)
|
|
314
327
|
def put(self, source: HarvestSource):
|
|
315
328
|
"""Update a harvest source"""
|
|
316
|
-
|
|
329
|
+
source.permissions["edit"].test()
|
|
317
330
|
form = api.validate(HarvestSourceForm, source)
|
|
318
331
|
source = actions.update_source(source, form.data)
|
|
319
332
|
return source
|
|
@@ -322,18 +335,19 @@ class SourceAPI(API):
|
|
|
322
335
|
@api.doc("delete_harvest_source")
|
|
323
336
|
@api.marshal_with(source_fields)
|
|
324
337
|
def delete(self, source: HarvestSource):
|
|
325
|
-
|
|
338
|
+
source.permissions["delete"].test()
|
|
326
339
|
return actions.delete_source(source), 204
|
|
327
340
|
|
|
328
341
|
|
|
329
342
|
@ns.route("/source/<harvest_source:source>/validate/", endpoint="validate_harvest_source")
|
|
330
343
|
class ValidateSourceAPI(API):
|
|
331
344
|
@api.doc("validate_harvest_source")
|
|
332
|
-
@api.secure
|
|
345
|
+
@api.secure
|
|
333
346
|
@api.expect(validation_fields)
|
|
334
347
|
@api.marshal_with(source_fields)
|
|
335
348
|
def post(self, source: HarvestSource):
|
|
336
349
|
"""Validate or reject an harvest source"""
|
|
350
|
+
source.permissions["validate"].test()
|
|
337
351
|
form = api.validate(HarvestSourceValidationForm)
|
|
338
352
|
if form.state.data == VALIDATION_ACCEPTED:
|
|
339
353
|
return actions.validate_source(source, form.comment.data)
|
|
@@ -354,7 +368,7 @@ class RunSourceAPI(API):
|
|
|
354
368
|
"Cannot run source manually. Please contact the platform if you need to reschedule the harvester.",
|
|
355
369
|
)
|
|
356
370
|
|
|
357
|
-
|
|
371
|
+
source.permissions["run"].test()
|
|
358
372
|
|
|
359
373
|
if source.validation.state != VALIDATION_ACCEPTED:
|
|
360
374
|
api.abort(400, "Source is not validated. Please validate the source before running.")
|
|
@@ -367,11 +381,12 @@ class RunSourceAPI(API):
|
|
|
367
381
|
@ns.route("/source/<harvest_source:source>/schedule/", endpoint="schedule_harvest_source")
|
|
368
382
|
class ScheduleSourceAPI(API):
|
|
369
383
|
@api.doc("schedule_harvest_source")
|
|
370
|
-
@api.secure
|
|
384
|
+
@api.secure
|
|
371
385
|
@api.expect((str, "A cron expression"))
|
|
372
386
|
@api.marshal_with(source_fields)
|
|
373
387
|
def post(self, source: HarvestSource):
|
|
374
388
|
"""Schedule an harvest source"""
|
|
389
|
+
source.permissions["schedule"].test()
|
|
375
390
|
# Handle both syntax: quoted and unquoted
|
|
376
391
|
try:
|
|
377
392
|
data = request.json
|
|
@@ -380,10 +395,11 @@ class ScheduleSourceAPI(API):
|
|
|
380
395
|
return actions.schedule(source, data)
|
|
381
396
|
|
|
382
397
|
@api.doc("unschedule_harvest_source")
|
|
383
|
-
@api.secure
|
|
398
|
+
@api.secure
|
|
384
399
|
@api.marshal_with(source_fields)
|
|
385
400
|
def delete(self, source: HarvestSource):
|
|
386
401
|
"""Unschedule an harvest source"""
|
|
402
|
+
source.permissions["schedule"].test()
|
|
387
403
|
return actions.unschedule(source), 204
|
|
388
404
|
|
|
389
405
|
|
|
@@ -408,6 +424,7 @@ class PreviewSourceAPI(API):
|
|
|
408
424
|
@api.marshal_with(preview_job_fields)
|
|
409
425
|
def get(self, source: HarvestSource):
|
|
410
426
|
"""Preview a single harvest source given an ID or a slug"""
|
|
427
|
+
source.permissions["preview"].test()
|
|
411
428
|
return actions.preview(source)
|
|
412
429
|
|
|
413
430
|
|
|
@@ -437,7 +454,7 @@ class JobAPI(API):
|
|
|
437
454
|
@api.expect(parser)
|
|
438
455
|
@api.marshal_with(job_fields)
|
|
439
456
|
def get(self, ident):
|
|
440
|
-
"""
|
|
457
|
+
"""Get a single job given an ID"""
|
|
441
458
|
return actions.get_job(ident)
|
|
442
459
|
|
|
443
460
|
|
udata/harvest/backends/base.py
CHANGED
|
@@ -166,6 +166,7 @@ class BaseBackend(object):
|
|
|
166
166
|
log.debug(f"Starting harvesting {self.source.name} ({self.source.url})…")
|
|
167
167
|
factory = HarvestJob if self.dryrun else HarvestJob.objects.create
|
|
168
168
|
self.job = factory(status="initialized", started=datetime.utcnow(), source=self.source)
|
|
169
|
+
self.remote_ids = set()
|
|
169
170
|
|
|
170
171
|
before_harvest_job.send(self)
|
|
171
172
|
# Set harvest_activity_user on global context during the run
|
|
@@ -190,6 +191,7 @@ class BaseBackend(object):
|
|
|
190
191
|
|
|
191
192
|
if any(i.status == "failed" for i in self.job.items):
|
|
192
193
|
self.job.status += "-errors"
|
|
194
|
+
|
|
193
195
|
except HarvestValidationError as e:
|
|
194
196
|
log.exception(
|
|
195
197
|
f'Harvesting validation failed for "{safe_unicode(self.source.name)}" ({self.source.backend})'
|
|
@@ -199,6 +201,15 @@ class BaseBackend(object):
|
|
|
199
201
|
|
|
200
202
|
error = HarvestError(message=safe_unicode(e))
|
|
201
203
|
self.job.errors.append(error)
|
|
204
|
+
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e:
|
|
205
|
+
log.warning(
|
|
206
|
+
f'Harvesting connection error for "{safe_unicode(self.source.name)}" ({self.source.backend}): {e}'
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
self.job.status = "failed"
|
|
210
|
+
|
|
211
|
+
error = HarvestError(message=safe_unicode(e), details=traceback.format_exc())
|
|
212
|
+
self.job.errors.append(error)
|
|
202
213
|
except Exception as e:
|
|
203
214
|
log.exception(
|
|
204
215
|
f'Harvesting failed for "{safe_unicode(self.source.name)}" ({self.source.backend})'
|
|
@@ -232,8 +243,13 @@ class BaseBackend(object):
|
|
|
232
243
|
|
|
233
244
|
current_app.logger.addHandler(log_catcher)
|
|
234
245
|
dataset = self.inner_process_dataset(item, **kwargs)
|
|
246
|
+
if dataset.harvest:
|
|
247
|
+
item.remote_url = dataset.harvest.remote_url
|
|
248
|
+
|
|
249
|
+
# Use `item.remote_id` from this point, because `inner_process_dataset` could have modified it.
|
|
250
|
+
|
|
251
|
+
self.ensure_unique_remote_id(item)
|
|
235
252
|
|
|
236
|
-
# Use `item.remote_id` because `inner_process_dataset` could have modified it.
|
|
237
253
|
dataset.harvest = self.update_dataset_harvest_info(dataset.harvest, item.remote_id)
|
|
238
254
|
dataset.archived = None
|
|
239
255
|
|
|
@@ -291,6 +307,10 @@ class BaseBackend(object):
|
|
|
291
307
|
raise HarvestSkipException("missing identifier")
|
|
292
308
|
|
|
293
309
|
dataservice = self.inner_process_dataservice(item, **kwargs)
|
|
310
|
+
if dataservice.harvest:
|
|
311
|
+
item.remote_url = dataservice.harvest.remote_url
|
|
312
|
+
|
|
313
|
+
self.ensure_unique_remote_id(item)
|
|
294
314
|
|
|
295
315
|
dataservice.harvest = self.update_dataservice_harvest_info(
|
|
296
316
|
dataservice.harvest, remote_id
|
|
@@ -325,6 +345,12 @@ class BaseBackend(object):
|
|
|
325
345
|
item.ended = datetime.utcnow()
|
|
326
346
|
self.save_job()
|
|
327
347
|
|
|
348
|
+
def ensure_unique_remote_id(self, item):
|
|
349
|
+
if item.remote_id in self.remote_ids:
|
|
350
|
+
raise HarvestValidationError(f"Identifier '{item.remote_id}' already exists")
|
|
351
|
+
|
|
352
|
+
self.remote_ids.add(item.remote_id)
|
|
353
|
+
|
|
328
354
|
def update_dataset_harvest_info(self, harvest: HarvestDatasetMetadata | None, remote_id: int):
|
|
329
355
|
if not harvest:
|
|
330
356
|
harvest = HarvestDatasetMetadata()
|
|
@@ -173,7 +173,10 @@ class CkanBackend(BaseBackend):
|
|
|
173
173
|
continue
|
|
174
174
|
elif key == "spatial":
|
|
175
175
|
# GeoJSON representation (Polygon or Point)
|
|
176
|
-
|
|
176
|
+
if isinstance(value, dict):
|
|
177
|
+
spatial_geom = value
|
|
178
|
+
else:
|
|
179
|
+
spatial_geom = json.loads(value)
|
|
177
180
|
elif key == "spatial-text":
|
|
178
181
|
# Textual representation of the extent / location
|
|
179
182
|
qs = GeoZone.objects(db.Q(name=value) | db.Q(slug=value))
|
|
@@ -213,12 +216,17 @@ class CkanBackend(BaseBackend):
|
|
|
213
216
|
dataset.spatial.zones = [spatial_zone]
|
|
214
217
|
|
|
215
218
|
if spatial_geom:
|
|
219
|
+
if "type" not in spatial_geom:
|
|
220
|
+
raise HarvestException(f"Spatial geometry {spatial_geom} without `type`")
|
|
221
|
+
|
|
216
222
|
if spatial_geom["type"] == "Polygon":
|
|
217
223
|
coordinates = [spatial_geom["coordinates"]]
|
|
218
224
|
elif spatial_geom["type"] == "MultiPolygon":
|
|
219
225
|
coordinates = spatial_geom["coordinates"]
|
|
220
226
|
else:
|
|
221
|
-
raise HarvestException(
|
|
227
|
+
raise HarvestException(
|
|
228
|
+
f"Unsupported spatial geometry {spatial_geom['type']} in {spatial_geom}. (Supported types are `Polygon` and `MultiPolygon`)"
|
|
229
|
+
)
|
|
222
230
|
dataset.spatial.geom = {"type": "MultiPolygon", "coordinates": coordinates}
|
|
223
231
|
|
|
224
232
|
if temporal_start and temporal_end:
|
|
@@ -267,5 +275,6 @@ class CkanBackend(BaseBackend):
|
|
|
267
275
|
|
|
268
276
|
class DkanBackend(CkanBackend):
|
|
269
277
|
name = "dkan"
|
|
278
|
+
display_name = "DKAN"
|
|
270
279
|
schema = dkan_schema
|
|
271
280
|
filters = []
|
udata/harvest/backends/dcat.py
CHANGED
|
@@ -225,7 +225,9 @@ class DcatBackend(BaseBackend):
|
|
|
225
225
|
|
|
226
226
|
dataset = self.get_dataset(item.remote_id)
|
|
227
227
|
remote_url_prefix = self.get_extra_config_value("remote_url_prefix")
|
|
228
|
-
dataset = dataset_from_rdf(
|
|
228
|
+
dataset = dataset_from_rdf(
|
|
229
|
+
page, dataset, node=node, remote_url_prefix=remote_url_prefix, dryrun=self.dryrun
|
|
230
|
+
)
|
|
229
231
|
if dataset.organization:
|
|
230
232
|
dataset.organization.compute_aggregate_metrics = False
|
|
231
233
|
self.organizations_to_update.add(dataset.organization)
|
|
@@ -242,6 +244,7 @@ class DcatBackend(BaseBackend):
|
|
|
242
244
|
node,
|
|
243
245
|
[item.dataset for item in self.job.items],
|
|
244
246
|
remote_url_prefix=remote_url_prefix,
|
|
247
|
+
dryrun=self.dryrun,
|
|
245
248
|
)
|
|
246
249
|
|
|
247
250
|
def get_node_from_item(self, graph, item):
|
udata/harvest/commands.py
CHANGED
|
@@ -4,6 +4,7 @@ import click
|
|
|
4
4
|
|
|
5
5
|
from udata.commands import KO, OK, cli, green, red
|
|
6
6
|
from udata.harvest.backends import get_all_backends, is_backend_enabled
|
|
7
|
+
from udata.models import Dataset
|
|
7
8
|
|
|
8
9
|
from . import actions
|
|
9
10
|
|
|
@@ -156,3 +157,35 @@ def attach(domain, filename):
|
|
|
156
157
|
log.info("Attaching datasets for domain %s", domain)
|
|
157
158
|
result = actions.attach(domain, filename)
|
|
158
159
|
log.info("Attached %s datasets to %s", result.success, domain)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
@grp.command()
|
|
163
|
+
@click.argument("dataset_id")
|
|
164
|
+
def detach(dataset_id):
|
|
165
|
+
"""
|
|
166
|
+
Detach a dataset_id from its harvest source
|
|
167
|
+
|
|
168
|
+
The dataset will be cleaned from harvested information
|
|
169
|
+
"""
|
|
170
|
+
log.info(f"Detaching dataset {dataset_id}")
|
|
171
|
+
dataset = Dataset.get(dataset_id)
|
|
172
|
+
actions.detach(dataset)
|
|
173
|
+
log.info("Done")
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
@grp.command()
|
|
177
|
+
@click.argument("identifier")
|
|
178
|
+
def detach_all_from_source(identifier):
|
|
179
|
+
"""
|
|
180
|
+
Detach all datasets from a harvest source
|
|
181
|
+
|
|
182
|
+
All the datasets will be cleaned from harvested information.
|
|
183
|
+
Make sure the harvest source won't create new duplicate datasets,
|
|
184
|
+
either by deactivating it or filtering its scope, etc.
|
|
185
|
+
"""
|
|
186
|
+
log.info(f"Detaching datasets from harvest source {identifier}")
|
|
187
|
+
count = actions.detach_all_from_source(actions.get_source(identifier))
|
|
188
|
+
log.info(f"Detached {count} datasets")
|
|
189
|
+
log.warning(
|
|
190
|
+
"Make sure the harvest source won't create new duplicate datasets, either by deactivating it or filtering its scope, etc."
|
|
191
|
+
)
|
udata/harvest/filters.py
CHANGED
|
@@ -3,6 +3,9 @@ from voluptuous import Invalid
|
|
|
3
3
|
|
|
4
4
|
from udata import tags, uris
|
|
5
5
|
|
|
6
|
+
TRUTHY_STRINGS = ("on", "t", "true", "y", "yes", "1")
|
|
7
|
+
FALSY_STRINGS = ("f", "false", "n", "no", "off", "0")
|
|
8
|
+
|
|
6
9
|
|
|
7
10
|
def boolean(value):
|
|
8
11
|
"""
|
|
@@ -15,17 +18,25 @@ def boolean(value):
|
|
|
15
18
|
if value is None or isinstance(value, bool):
|
|
16
19
|
return value
|
|
17
20
|
|
|
18
|
-
|
|
19
|
-
return bool(
|
|
20
|
-
|
|
21
|
+
if isinstance(value, int):
|
|
22
|
+
return bool(value)
|
|
23
|
+
|
|
24
|
+
if isinstance(value, str):
|
|
21
25
|
lower_value = value.strip().lower()
|
|
26
|
+
|
|
22
27
|
if not lower_value:
|
|
23
28
|
return None
|
|
24
|
-
if lower_value in
|
|
29
|
+
if lower_value in FALSY_STRINGS:
|
|
25
30
|
return False
|
|
26
|
-
if lower_value in
|
|
31
|
+
if lower_value in TRUTHY_STRINGS:
|
|
27
32
|
return True
|
|
28
|
-
raise Invalid(
|
|
33
|
+
raise Invalid(
|
|
34
|
+
f"Unable to parse string '{value}' as boolean. Supported values are {','.join(TRUTHY_STRINGS)} for `True` and {','.join(FALSY_STRINGS)} for `False`."
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
raise Invalid(
|
|
38
|
+
f"Cannot convert value {value} of type {type(value)} to boolean. Supported types are `bool`, `int` and `str`"
|
|
39
|
+
)
|
|
29
40
|
|
|
30
41
|
|
|
31
42
|
def to_date(value):
|