udata 14.0.0__py3-none-any.whl → 14.5.1.dev6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of udata might be problematic. Click here for more details.

Files changed (152) hide show
  1. udata/api/__init__.py +2 -0
  2. udata/api_fields.py +35 -4
  3. udata/app.py +18 -20
  4. udata/auth/__init__.py +29 -6
  5. udata/auth/forms.py +2 -2
  6. udata/auth/views.py +13 -6
  7. udata/commands/dcat.py +1 -1
  8. udata/commands/serve.py +3 -11
  9. udata/commands/tests/test_fixtures.py +9 -9
  10. udata/core/access_type/api.py +1 -1
  11. udata/core/access_type/constants.py +12 -8
  12. udata/core/activity/api.py +5 -6
  13. udata/core/badges/tests/test_commands.py +6 -6
  14. udata/core/csv.py +5 -0
  15. udata/core/dataservices/api.py +8 -1
  16. udata/core/dataservices/apiv2.py +2 -5
  17. udata/core/dataservices/models.py +5 -2
  18. udata/core/dataservices/rdf.py +2 -1
  19. udata/core/dataservices/tasks.py +13 -2
  20. udata/core/dataset/api.py +10 -0
  21. udata/core/dataset/models.py +6 -6
  22. udata/core/dataset/permissions.py +31 -0
  23. udata/core/dataset/rdf.py +8 -2
  24. udata/core/dataset/tasks.py +23 -7
  25. udata/core/discussions/api.py +15 -1
  26. udata/core/discussions/models.py +6 -0
  27. udata/core/legal/__init__.py +0 -0
  28. udata/core/legal/mails.py +128 -0
  29. udata/core/organization/api.py +16 -5
  30. udata/core/organization/apiv2.py +2 -3
  31. udata/core/organization/mails.py +1 -1
  32. udata/core/organization/models.py +15 -2
  33. udata/core/organization/notifications.py +84 -0
  34. udata/core/organization/permissions.py +1 -1
  35. udata/core/organization/tasks.py +3 -0
  36. udata/core/pages/tests/test_api.py +32 -0
  37. udata/core/post/api.py +24 -69
  38. udata/core/post/models.py +84 -16
  39. udata/core/post/tests/test_api.py +24 -1
  40. udata/core/reports/api.py +18 -0
  41. udata/core/reports/models.py +42 -2
  42. udata/core/reuse/api.py +8 -0
  43. udata/core/reuse/apiv2.py +2 -5
  44. udata/core/reuse/models.py +1 -1
  45. udata/core/reuse/tasks.py +7 -0
  46. udata/core/spatial/forms.py +2 -2
  47. udata/core/topic/models.py +8 -2
  48. udata/core/user/api.py +10 -3
  49. udata/core/user/models.py +12 -2
  50. udata/features/notifications/api.py +7 -18
  51. udata/features/notifications/models.py +56 -0
  52. udata/features/notifications/tasks.py +25 -0
  53. udata/flask_mongoengine/engine.py +0 -4
  54. udata/flask_mongoengine/pagination.py +1 -1
  55. udata/frontend/markdown.py +2 -1
  56. udata/harvest/actions.py +21 -1
  57. udata/harvest/api.py +25 -8
  58. udata/harvest/backends/base.py +27 -1
  59. udata/harvest/backends/ckan/harvesters.py +11 -2
  60. udata/harvest/backends/dcat.py +4 -1
  61. udata/harvest/commands.py +33 -0
  62. udata/harvest/filters.py +17 -6
  63. udata/harvest/models.py +16 -0
  64. udata/harvest/permissions.py +27 -0
  65. udata/harvest/tests/ckan/test_ckan_backend.py +33 -0
  66. udata/harvest/tests/test_actions.py +58 -5
  67. udata/harvest/tests/test_api.py +276 -122
  68. udata/harvest/tests/test_base_backend.py +86 -1
  69. udata/harvest/tests/test_dcat_backend.py +81 -10
  70. udata/harvest/tests/test_filters.py +6 -0
  71. udata/i18n.py +1 -4
  72. udata/mail.py +19 -1
  73. udata/migrations/2025-10-31-create-membership-request-notifications.py +55 -0
  74. udata/migrations/2025-12-04-add-uuid-to-discussion-messages.py +28 -0
  75. udata/mongo/slug_fields.py +1 -1
  76. udata/rdf.py +58 -10
  77. udata/routing.py +2 -2
  78. udata/settings.py +11 -0
  79. udata/tasks.py +1 -0
  80. udata/templates/mail/message.html +5 -31
  81. udata/tests/__init__.py +27 -2
  82. udata/tests/api/__init__.py +108 -21
  83. udata/tests/api/test_activities_api.py +36 -0
  84. udata/tests/api/test_auth_api.py +121 -95
  85. udata/tests/api/test_base_api.py +7 -4
  86. udata/tests/api/test_datasets_api.py +50 -19
  87. udata/tests/api/test_organizations_api.py +192 -197
  88. udata/tests/api/test_reports_api.py +157 -0
  89. udata/tests/api/test_reuses_api.py +147 -147
  90. udata/tests/api/test_security_api.py +12 -12
  91. udata/tests/api/test_swagger.py +4 -4
  92. udata/tests/api/test_tags_api.py +8 -8
  93. udata/tests/api/test_user_api.py +1 -1
  94. udata/tests/apiv2/test_search.py +30 -0
  95. udata/tests/apiv2/test_swagger.py +4 -4
  96. udata/tests/cli/test_cli_base.py +8 -9
  97. udata/tests/dataservice/test_dataservice_tasks.py +29 -0
  98. udata/tests/dataset/test_dataset_commands.py +4 -4
  99. udata/tests/dataset/test_dataset_model.py +66 -26
  100. udata/tests/dataset/test_dataset_rdf.py +99 -5
  101. udata/tests/dataset/test_dataset_tasks.py +25 -0
  102. udata/tests/frontend/test_auth.py +58 -1
  103. udata/tests/frontend/test_csv.py +0 -3
  104. udata/tests/helpers.py +31 -27
  105. udata/tests/organization/test_notifications.py +67 -2
  106. udata/tests/plugin.py +6 -261
  107. udata/tests/search/test_search_integration.py +33 -0
  108. udata/tests/site/test_site_csv_exports.py +22 -10
  109. udata/tests/test_activity.py +9 -9
  110. udata/tests/test_api_fields.py +10 -0
  111. udata/tests/test_dcat_commands.py +2 -2
  112. udata/tests/test_discussions.py +5 -5
  113. udata/tests/test_legal_mails.py +359 -0
  114. udata/tests/test_migrations.py +21 -21
  115. udata/tests/test_notifications.py +15 -57
  116. udata/tests/test_notifications_task.py +43 -0
  117. udata/tests/test_owned.py +81 -1
  118. udata/tests/test_storages.py +25 -19
  119. udata/tests/test_topics.py +77 -61
  120. udata/tests/test_uris.py +33 -0
  121. udata/tests/workers/test_jobs_commands.py +23 -23
  122. udata/translations/ar/LC_MESSAGES/udata.mo +0 -0
  123. udata/translations/ar/LC_MESSAGES/udata.po +187 -108
  124. udata/translations/de/LC_MESSAGES/udata.mo +0 -0
  125. udata/translations/de/LC_MESSAGES/udata.po +187 -108
  126. udata/translations/es/LC_MESSAGES/udata.mo +0 -0
  127. udata/translations/es/LC_MESSAGES/udata.po +187 -108
  128. udata/translations/fr/LC_MESSAGES/udata.mo +0 -0
  129. udata/translations/fr/LC_MESSAGES/udata.po +188 -109
  130. udata/translations/it/LC_MESSAGES/udata.mo +0 -0
  131. udata/translations/it/LC_MESSAGES/udata.po +187 -108
  132. udata/translations/pt/LC_MESSAGES/udata.mo +0 -0
  133. udata/translations/pt/LC_MESSAGES/udata.po +187 -108
  134. udata/translations/sr/LC_MESSAGES/udata.mo +0 -0
  135. udata/translations/sr/LC_MESSAGES/udata.po +187 -108
  136. udata/translations/udata.pot +215 -106
  137. udata/uris.py +0 -2
  138. udata-14.5.1.dev6.dist-info/METADATA +109 -0
  139. {udata-14.0.0.dist-info → udata-14.5.1.dev6.dist-info}/RECORD +143 -140
  140. udata/core/post/forms.py +0 -30
  141. udata/flask_mongoengine/json.py +0 -38
  142. udata/templates/mail/base.html +0 -105
  143. udata/templates/mail/base.txt +0 -6
  144. udata/templates/mail/button.html +0 -3
  145. udata/templates/mail/layouts/1-column.html +0 -19
  146. udata/templates/mail/layouts/2-columns.html +0 -20
  147. udata/templates/mail/layouts/center-panel.html +0 -16
  148. udata-14.0.0.dist-info/METADATA +0 -132
  149. {udata-14.0.0.dist-info → udata-14.5.1.dev6.dist-info}/WHEEL +0 -0
  150. {udata-14.0.0.dist-info → udata-14.5.1.dev6.dist-info}/entry_points.txt +0 -0
  151. {udata-14.0.0.dist-info → udata-14.5.1.dev6.dist-info}/licenses/LICENSE +0 -0
  152. {udata-14.0.0.dist-info → udata-14.5.1.dev6.dist-info}/top_level.txt +0 -0
@@ -64,8 +64,8 @@ class GeomField(Field):
64
64
  self.data = geojson.GeoJSON.to_instance(value)
65
65
  except Exception:
66
66
  self.data = None
67
- log.exception("Unable to parse GeoJSON")
68
- raise ValueError(self.gettext("Not a valid GeoJSON"))
67
+ log.warning(f"Unable to parse GeoJSON: {value}")
68
+ raise validators.ValidationError(self.gettext("Not a valid GeoJSON"))
69
69
 
70
70
  def pre_validate(self, form):
71
71
  if self.data:
@@ -16,7 +16,10 @@ __all__ = ("Topic", "TopicElement")
16
16
 
17
17
  class TopicElement(Auditable, db.Document):
18
18
  title = field(db.StringField(required=False))
19
- description = field(db.StringField(required=False))
19
+ description = field(
20
+ db.StringField(required=False),
21
+ markdown=True,
22
+ )
20
23
  tags = field(db.ListField(db.StringField()))
21
24
  extras = field(db.ExtrasField())
22
25
  element = field(db.GenericReferenceField(choices=["Dataset", "Reuse", "Dataservice"]))
@@ -63,7 +66,10 @@ class Topic(db.Datetimed, Auditable, Linkable, db.Document, Owned):
63
66
  db.SlugField(max_length=255, required=True, populate_from="name", update=True, follow=True),
64
67
  auditable=False,
65
68
  )
66
- description = field(db.StringField())
69
+ description = field(
70
+ db.StringField(),
71
+ markdown=True,
72
+ )
67
73
  tags = field(db.ListField(db.StringField()))
68
74
  color = field(db.IntField())
69
75
 
udata/core/user/api.py CHANGED
@@ -8,6 +8,7 @@ from udata.core.dataset.api_fields import community_resource_fields, dataset_fie
8
8
  from udata.core.discussions.actions import discussions_for
9
9
  from udata.core.discussions.api import discussion_fields
10
10
  from udata.core.followers.api import FollowAPI
11
+ from udata.core.legal.mails import add_send_legal_notice_argument, send_legal_notice_on_deletion
11
12
  from udata.core.storages.api import (
12
13
  image_parser,
13
14
  parse_uploaded_image,
@@ -265,11 +266,14 @@ class UserAvatarAPI(API):
265
266
  return {"image": user.avatar}
266
267
 
267
268
 
268
- delete_parser = api.parser()
269
+ delete_parser = add_send_legal_notice_argument(api.parser())
269
270
  delete_parser.add_argument(
270
271
  "no_mail",
271
272
  type=bool,
272
- help="Do not send a mail to notify the user of the deletion",
273
+ help=(
274
+ "Do not send the simple deletion notification email. "
275
+ "Note: automatically set to True when send_legal_notice=True to avoid sending duplicate emails."
276
+ ),
273
277
  location="args",
274
278
  default=False,
275
279
  )
@@ -321,8 +325,11 @@ class UserAPI(API):
321
325
  api.abort(
322
326
  403, "You cannot delete yourself with this API. " + 'Use the "me" API instead.'
323
327
  )
328
+ send_legal_notice_on_deletion(user, args)
324
329
 
325
- user.mark_as_deleted(notify=not args["no_mail"], delete_comments=args["delete_comments"])
330
+ # Skip simple notification if legal notice is sent (to avoid duplicate emails)
331
+ skip_notification = args["no_mail"] or args["send_legal_notice"]
332
+ user.mark_as_deleted(notify=not skip_notification, delete_comments=args["delete_comments"])
326
333
  return "", 204
327
334
 
328
335
 
udata/core/user/models.py CHANGED
@@ -18,6 +18,7 @@ from udata.core.discussions.models import Discussion
18
18
  from udata.core.linkable import Linkable
19
19
  from udata.core.storages import avatars, default_image_basename
20
20
  from udata.frontend.markdown import mdstrip
21
+ from udata.i18n import lazy_gettext as _
21
22
  from udata.models import Follow, WithMetrics, db
22
23
  from udata.uris import cdata_url
23
24
 
@@ -62,7 +63,10 @@ class User(WithMetrics, UserMixin, Linkable, db.Document):
62
63
  db.ImageField(fs=avatars, basename=default_image_basename, thumbnails=AVATAR_SIZES)
63
64
  )
64
65
  website = field(db.URLField())
65
- about = field(db.StringField())
66
+ about = field(
67
+ db.StringField(),
68
+ markdown=True,
69
+ )
66
70
 
67
71
  prefered_language = field(db.StringField())
68
72
 
@@ -116,6 +120,8 @@ class User(WithMetrics, UserMixin, Linkable, db.Document):
116
120
  "auto_create_index_on_save": True,
117
121
  }
118
122
 
123
+ verbose_name = _("account")
124
+
119
125
  __metrics_keys__ = [
120
126
  "datasets",
121
127
  "reuses",
@@ -142,7 +148,7 @@ class User(WithMetrics, UserMixin, Linkable, db.Document):
142
148
  return self.has_role("admin")
143
149
 
144
150
  def self_web_url(self, **kwargs):
145
- return cdata_url(f"/users/{self._link_id(**kwargs)}/", **kwargs)
151
+ return cdata_url(f"/users/{self._link_id(**kwargs)}", **kwargs)
146
152
 
147
153
  def self_api_url(self, **kwargs):
148
154
  return url_for(
@@ -297,6 +303,10 @@ class User(WithMetrics, UserMixin, Linkable, db.Document):
297
303
  discussion.save()
298
304
  Follow.objects(follower=self).delete()
299
305
  Follow.objects(following=self).delete()
306
+ # Remove related notifications
307
+ from udata.features.notifications.models import Notification
308
+
309
+ Notification.objects.with_user_in_details(self).delete()
300
310
 
301
311
  from udata.models import ContactPoint
302
312
 
@@ -1,30 +1,19 @@
1
- from udata.api import API, api, fields
1
+ from udata.api import API, api
2
2
  from udata.auth import current_user
3
3
 
4
- from .actions import get_notifications
4
+ from .models import Notification
5
5
 
6
6
  notifs = api.namespace("notifications", "Notifications API")
7
7
 
8
- notifications_fields = api.model(
9
- "Notification",
10
- {
11
- "type": fields.String(description="The notification type", readonly=True),
12
- "created_on": fields.ISODateTime(
13
- description="The notification creation datetime", readonly=True
14
- ),
15
- "details": fields.Raw(
16
- description="Key-Value details depending on notification type", readonly=True
17
- ),
18
- },
19
- )
20
-
21
8
 
22
9
  @notifs.route("/", endpoint="notifications")
23
10
  class NotificationsAPI(API):
24
11
  @api.secure
25
- @api.doc("get_notifications")
26
- @api.marshal_list_with(notifications_fields)
12
+ @api.doc("list_notifications")
13
+ @api.expect(Notification.__index_parser__)
14
+ @api.marshal_with(Notification.__page_fields__)
27
15
  def get(self):
28
16
  """List all current user pending notifications"""
29
17
  user = current_user._get_current_object()
30
- return get_notifications(user)
18
+ notifications = Notification.objects(user=user)
19
+ return Notification.apply_pagination(Notification.apply_sort_filters(notifications))
@@ -0,0 +1,56 @@
1
+ from flask_restx.inputs import boolean
2
+ from mongoengine import NULLIFY
3
+
4
+ from udata.api_fields import field, generate_fields
5
+ from udata.core.organization.notifications import MembershipRequestNotificationDetails
6
+ from udata.core.user.api_fields import user_ref_fields
7
+ from udata.core.user.models import User
8
+ from udata.models import db
9
+ from udata.mongo.datetime_fields import Datetimed
10
+ from udata.mongo.queryset import UDataQuerySet
11
+
12
+
13
+ class NotificationQuerySet(UDataQuerySet):
14
+ def with_organization_in_details(self, organization):
15
+ """This function must be updated to handle new details cases"""
16
+ return self(details__request_organization=organization)
17
+
18
+ def with_user_in_details(self, user):
19
+ """This function must be updated to handle new details cases"""
20
+ return self(details__request_user=user)
21
+
22
+
23
+ def is_handled(base_query, filter_value):
24
+ if filter_value is None:
25
+ return base_query
26
+ if filter_value is True:
27
+ return base_query.filter(handled_at__ne=None)
28
+ return base_query.filter(handled_at=None)
29
+
30
+
31
+ @generate_fields()
32
+ class Notification(Datetimed, db.Document):
33
+ meta = {
34
+ "ordering": ["-created_at"],
35
+ "queryset_class": NotificationQuerySet,
36
+ }
37
+
38
+ id = field(db.AutoUUIDField(primary_key=True))
39
+ handled_at = field(
40
+ db.DateTimeField(),
41
+ sortable=True,
42
+ auditable=False,
43
+ filterable={"key": "handled", "query": is_handled, "type": boolean},
44
+ )
45
+ user = field(
46
+ db.ReferenceField(User, reverse_delete_rule=NULLIFY),
47
+ nested_fields=user_ref_fields,
48
+ readonly=True,
49
+ allow_null=True,
50
+ auditable=False,
51
+ filterable={},
52
+ )
53
+ details = field(
54
+ db.GenericEmbeddedDocumentField(choices=(MembershipRequestNotificationDetails,)),
55
+ generic=True,
56
+ )
@@ -0,0 +1,25 @@
1
+ import logging
2
+ from datetime import datetime, timedelta
3
+
4
+ from flask import current_app
5
+
6
+ from udata.features.notifications.models import Notification
7
+ from udata.tasks import job
8
+
9
+ log = logging.getLogger(__name__)
10
+
11
+
12
+ @job("delete-expired-notifications")
13
+ def delete_expired_notifications(self):
14
+ # Delete expired notifications
15
+ handled_at = datetime.utcnow() - timedelta(
16
+ days=current_app.config["DAYS_AFTER_NOTIFICATION_EXPIRED"]
17
+ )
18
+ notifications_to_delete = Notification.objects(
19
+ handled_at__lte=handled_at,
20
+ )
21
+ count = notifications_to_delete.count()
22
+ for notification in notifications_to_delete:
23
+ notification.delete()
24
+
25
+ log.info(f"Deleted {count} expired notifications")
@@ -7,7 +7,6 @@ from mongoengine.errors import DoesNotExist
7
7
  from mongoengine.queryset import QuerySet
8
8
 
9
9
  from .connection import create_connections
10
- from .json import override_json_encoder
11
10
  from .pagination import ListFieldPagination, Pagination
12
11
  from .wtf import WtfBaseField
13
12
 
@@ -108,9 +107,6 @@ class MongoEngine(object):
108
107
 
109
108
  app.extensions = getattr(app, "extensions", {})
110
109
 
111
- # Make documents JSON serializable
112
- override_json_encoder(app)
113
-
114
110
  if "mongoengine" not in app.extensions:
115
111
  app.extensions["mongoengine"] = {}
116
112
 
@@ -6,7 +6,7 @@ from mongoengine.queryset import QuerySet
6
6
 
7
7
  class Pagination(object):
8
8
  def __init__(self, iterable, page, per_page):
9
- if page < 1:
9
+ if page < 1 or per_page < 1:
10
10
  abort(404)
11
11
 
12
12
  self.iterable = iterable
@@ -7,8 +7,9 @@ import html2text
7
7
  import mistune
8
8
  from bleach.css_sanitizer import CSSSanitizer
9
9
  from bleach.linkifier import LinkifyFilter
10
- from flask import Markup, current_app, request
10
+ from flask import current_app, request
11
11
  from jinja2.filters import do_striptags, do_truncate
12
+ from markupsafe import Markup
12
13
  from werkzeug.local import LocalProxy
13
14
 
14
15
  from udata.i18n import _
udata/harvest/actions.py CHANGED
@@ -254,7 +254,7 @@ def schedule(
254
254
  source.modify(
255
255
  periodic_task=PeriodicTask.objects.create(
256
256
  task="harvest",
257
- name="Harvest {0}".format(source.name),
257
+ name=f"Harvest {source.name} ({source.id})",
258
258
  description="Periodic Harvesting",
259
259
  enabled=True,
260
260
  args=[str(source.id)],
@@ -317,3 +317,23 @@ def attach(domain, filename):
317
317
  count += 1
318
318
 
319
319
  return AttachResult(count, errors)
320
+
321
+
322
+ def detach(dataset: Dataset):
323
+ """Detach a dataset from its harvest source
324
+
325
+ The dataset will be cleaned from harvested information
326
+ and will no longer be updated or archived by harvesting.
327
+ """
328
+ dataset.harvest = None
329
+ for resource in dataset.resources:
330
+ resource.harvest = None
331
+ dataset.save()
332
+
333
+
334
+ def detach_all_from_source(source: HarvestSource):
335
+ """Detach all datasets linked to a harvest source"""
336
+ datasets = Dataset.objects.filter(harvest__source_id=str(source.id))
337
+ for dataset in datasets:
338
+ detach(dataset)
339
+ return len(datasets)
udata/harvest/api.py CHANGED
@@ -6,7 +6,6 @@ from udata.api import API, api, fields
6
6
  from udata.auth import admin_permission
7
7
  from udata.core.dataservices.models import Dataservice
8
8
  from udata.core.dataset.api_fields import dataset_fields, dataset_ref_fields
9
- from udata.core.dataset.permissions import OwnablePermission
10
9
  from udata.core.organization.api_fields import org_ref_fields
11
10
  from udata.core.organization.permissions import EditOrganizationPermission
12
11
  from udata.core.user.api_fields import user_ref_fields
@@ -55,6 +54,7 @@ item_fields = api.model(
55
54
  "HarvestItem",
56
55
  {
57
56
  "remote_id": fields.String(description="The item remote ID to process", required=True),
57
+ "remote_url": fields.String(description="The item remote url (if available)"),
58
58
  "dataset": fields.Nested(
59
59
  dataset_ref_fields, description="The processed dataset", allow_null=True
60
60
  ),
@@ -115,6 +115,18 @@ validation_fields = api.model(
115
115
  },
116
116
  )
117
117
 
118
+ source_permissions_fields = api.model(
119
+ "HarvestSourcePermissions",
120
+ {
121
+ "edit": fields.Permission(),
122
+ "delete": fields.Permission(),
123
+ "run": fields.Permission(),
124
+ "preview": fields.Permission(),
125
+ "validate": fields.Permission(),
126
+ "schedule": fields.Permission(),
127
+ },
128
+ )
129
+
118
130
  source_fields = api.model(
119
131
  "HarvestSource",
120
132
  {
@@ -153,6 +165,7 @@ source_fields = api.model(
153
165
  "schedule": fields.String(
154
166
  description="The source schedule (interval or cron expression)", readonly=True
155
167
  ),
168
+ "permissions": fields.Nested(source_permissions_fields, readonly=True),
156
169
  },
157
170
  )
158
171
 
@@ -313,7 +326,7 @@ class SourceAPI(API):
313
326
  @api.marshal_with(source_fields)
314
327
  def put(self, source: HarvestSource):
315
328
  """Update a harvest source"""
316
- OwnablePermission(source).test()
329
+ source.permissions["edit"].test()
317
330
  form = api.validate(HarvestSourceForm, source)
318
331
  source = actions.update_source(source, form.data)
319
332
  return source
@@ -322,18 +335,19 @@ class SourceAPI(API):
322
335
  @api.doc("delete_harvest_source")
323
336
  @api.marshal_with(source_fields)
324
337
  def delete(self, source: HarvestSource):
325
- OwnablePermission(source).test()
338
+ source.permissions["delete"].test()
326
339
  return actions.delete_source(source), 204
327
340
 
328
341
 
329
342
  @ns.route("/source/<harvest_source:source>/validate/", endpoint="validate_harvest_source")
330
343
  class ValidateSourceAPI(API):
331
344
  @api.doc("validate_harvest_source")
332
- @api.secure(admin_permission)
345
+ @api.secure
333
346
  @api.expect(validation_fields)
334
347
  @api.marshal_with(source_fields)
335
348
  def post(self, source: HarvestSource):
336
349
  """Validate or reject an harvest source"""
350
+ source.permissions["validate"].test()
337
351
  form = api.validate(HarvestSourceValidationForm)
338
352
  if form.state.data == VALIDATION_ACCEPTED:
339
353
  return actions.validate_source(source, form.comment.data)
@@ -354,7 +368,7 @@ class RunSourceAPI(API):
354
368
  "Cannot run source manually. Please contact the platform if you need to reschedule the harvester.",
355
369
  )
356
370
 
357
- OwnablePermission(source).test()
371
+ source.permissions["run"].test()
358
372
 
359
373
  if source.validation.state != VALIDATION_ACCEPTED:
360
374
  api.abort(400, "Source is not validated. Please validate the source before running.")
@@ -367,11 +381,12 @@ class RunSourceAPI(API):
367
381
  @ns.route("/source/<harvest_source:source>/schedule/", endpoint="schedule_harvest_source")
368
382
  class ScheduleSourceAPI(API):
369
383
  @api.doc("schedule_harvest_source")
370
- @api.secure(admin_permission)
384
+ @api.secure
371
385
  @api.expect((str, "A cron expression"))
372
386
  @api.marshal_with(source_fields)
373
387
  def post(self, source: HarvestSource):
374
388
  """Schedule an harvest source"""
389
+ source.permissions["schedule"].test()
375
390
  # Handle both syntax: quoted and unquoted
376
391
  try:
377
392
  data = request.json
@@ -380,10 +395,11 @@ class ScheduleSourceAPI(API):
380
395
  return actions.schedule(source, data)
381
396
 
382
397
  @api.doc("unschedule_harvest_source")
383
- @api.secure(admin_permission)
398
+ @api.secure
384
399
  @api.marshal_with(source_fields)
385
400
  def delete(self, source: HarvestSource):
386
401
  """Unschedule an harvest source"""
402
+ source.permissions["schedule"].test()
387
403
  return actions.unschedule(source), 204
388
404
 
389
405
 
@@ -408,6 +424,7 @@ class PreviewSourceAPI(API):
408
424
  @api.marshal_with(preview_job_fields)
409
425
  def get(self, source: HarvestSource):
410
426
  """Preview a single harvest source given an ID or a slug"""
427
+ source.permissions["preview"].test()
411
428
  return actions.preview(source)
412
429
 
413
430
 
@@ -437,7 +454,7 @@ class JobAPI(API):
437
454
  @api.expect(parser)
438
455
  @api.marshal_with(job_fields)
439
456
  def get(self, ident):
440
- """List all jobs for a given source"""
457
+ """Get a single job given an ID"""
441
458
  return actions.get_job(ident)
442
459
 
443
460
 
@@ -166,6 +166,7 @@ class BaseBackend(object):
166
166
  log.debug(f"Starting harvesting {self.source.name} ({self.source.url})…")
167
167
  factory = HarvestJob if self.dryrun else HarvestJob.objects.create
168
168
  self.job = factory(status="initialized", started=datetime.utcnow(), source=self.source)
169
+ self.remote_ids = set()
169
170
 
170
171
  before_harvest_job.send(self)
171
172
  # Set harvest_activity_user on global context during the run
@@ -190,6 +191,7 @@ class BaseBackend(object):
190
191
 
191
192
  if any(i.status == "failed" for i in self.job.items):
192
193
  self.job.status += "-errors"
194
+
193
195
  except HarvestValidationError as e:
194
196
  log.exception(
195
197
  f'Harvesting validation failed for "{safe_unicode(self.source.name)}" ({self.source.backend})'
@@ -199,6 +201,15 @@ class BaseBackend(object):
199
201
 
200
202
  error = HarvestError(message=safe_unicode(e))
201
203
  self.job.errors.append(error)
204
+ except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e:
205
+ log.warning(
206
+ f'Harvesting connection error for "{safe_unicode(self.source.name)}" ({self.source.backend}): {e}'
207
+ )
208
+
209
+ self.job.status = "failed"
210
+
211
+ error = HarvestError(message=safe_unicode(e), details=traceback.format_exc())
212
+ self.job.errors.append(error)
202
213
  except Exception as e:
203
214
  log.exception(
204
215
  f'Harvesting failed for "{safe_unicode(self.source.name)}" ({self.source.backend})'
@@ -232,8 +243,13 @@ class BaseBackend(object):
232
243
 
233
244
  current_app.logger.addHandler(log_catcher)
234
245
  dataset = self.inner_process_dataset(item, **kwargs)
246
+ if dataset.harvest:
247
+ item.remote_url = dataset.harvest.remote_url
248
+
249
+ # Use `item.remote_id` from this point, because `inner_process_dataset` could have modified it.
250
+
251
+ self.ensure_unique_remote_id(item)
235
252
 
236
- # Use `item.remote_id` because `inner_process_dataset` could have modified it.
237
253
  dataset.harvest = self.update_dataset_harvest_info(dataset.harvest, item.remote_id)
238
254
  dataset.archived = None
239
255
 
@@ -291,6 +307,10 @@ class BaseBackend(object):
291
307
  raise HarvestSkipException("missing identifier")
292
308
 
293
309
  dataservice = self.inner_process_dataservice(item, **kwargs)
310
+ if dataservice.harvest:
311
+ item.remote_url = dataservice.harvest.remote_url
312
+
313
+ self.ensure_unique_remote_id(item)
294
314
 
295
315
  dataservice.harvest = self.update_dataservice_harvest_info(
296
316
  dataservice.harvest, remote_id
@@ -325,6 +345,12 @@ class BaseBackend(object):
325
345
  item.ended = datetime.utcnow()
326
346
  self.save_job()
327
347
 
348
+ def ensure_unique_remote_id(self, item):
349
+ if item.remote_id in self.remote_ids:
350
+ raise HarvestValidationError(f"Identifier '{item.remote_id}' already exists")
351
+
352
+ self.remote_ids.add(item.remote_id)
353
+
328
354
  def update_dataset_harvest_info(self, harvest: HarvestDatasetMetadata | None, remote_id: int):
329
355
  if not harvest:
330
356
  harvest = HarvestDatasetMetadata()
@@ -173,7 +173,10 @@ class CkanBackend(BaseBackend):
173
173
  continue
174
174
  elif key == "spatial":
175
175
  # GeoJSON representation (Polygon or Point)
176
- spatial_geom = json.loads(value)
176
+ if isinstance(value, dict):
177
+ spatial_geom = value
178
+ else:
179
+ spatial_geom = json.loads(value)
177
180
  elif key == "spatial-text":
178
181
  # Textual representation of the extent / location
179
182
  qs = GeoZone.objects(db.Q(name=value) | db.Q(slug=value))
@@ -213,12 +216,17 @@ class CkanBackend(BaseBackend):
213
216
  dataset.spatial.zones = [spatial_zone]
214
217
 
215
218
  if spatial_geom:
219
+ if "type" not in spatial_geom:
220
+ raise HarvestException(f"Spatial geometry {spatial_geom} without `type`")
221
+
216
222
  if spatial_geom["type"] == "Polygon":
217
223
  coordinates = [spatial_geom["coordinates"]]
218
224
  elif spatial_geom["type"] == "MultiPolygon":
219
225
  coordinates = spatial_geom["coordinates"]
220
226
  else:
221
- raise HarvestException("Unsupported spatial geometry")
227
+ raise HarvestException(
228
+ f"Unsupported spatial geometry {spatial_geom['type']} in {spatial_geom}. (Supported types are `Polygon` and `MultiPolygon`)"
229
+ )
222
230
  dataset.spatial.geom = {"type": "MultiPolygon", "coordinates": coordinates}
223
231
 
224
232
  if temporal_start and temporal_end:
@@ -267,5 +275,6 @@ class CkanBackend(BaseBackend):
267
275
 
268
276
  class DkanBackend(CkanBackend):
269
277
  name = "dkan"
278
+ display_name = "DKAN"
270
279
  schema = dkan_schema
271
280
  filters = []
@@ -225,7 +225,9 @@ class DcatBackend(BaseBackend):
225
225
 
226
226
  dataset = self.get_dataset(item.remote_id)
227
227
  remote_url_prefix = self.get_extra_config_value("remote_url_prefix")
228
- dataset = dataset_from_rdf(page, dataset, node=node, remote_url_prefix=remote_url_prefix)
228
+ dataset = dataset_from_rdf(
229
+ page, dataset, node=node, remote_url_prefix=remote_url_prefix, dryrun=self.dryrun
230
+ )
229
231
  if dataset.organization:
230
232
  dataset.organization.compute_aggregate_metrics = False
231
233
  self.organizations_to_update.add(dataset.organization)
@@ -242,6 +244,7 @@ class DcatBackend(BaseBackend):
242
244
  node,
243
245
  [item.dataset for item in self.job.items],
244
246
  remote_url_prefix=remote_url_prefix,
247
+ dryrun=self.dryrun,
245
248
  )
246
249
 
247
250
  def get_node_from_item(self, graph, item):
udata/harvest/commands.py CHANGED
@@ -4,6 +4,7 @@ import click
4
4
 
5
5
  from udata.commands import KO, OK, cli, green, red
6
6
  from udata.harvest.backends import get_all_backends, is_backend_enabled
7
+ from udata.models import Dataset
7
8
 
8
9
  from . import actions
9
10
 
@@ -156,3 +157,35 @@ def attach(domain, filename):
156
157
  log.info("Attaching datasets for domain %s", domain)
157
158
  result = actions.attach(domain, filename)
158
159
  log.info("Attached %s datasets to %s", result.success, domain)
160
+
161
+
162
+ @grp.command()
163
+ @click.argument("dataset_id")
164
+ def detach(dataset_id):
165
+ """
166
+ Detach a dataset_id from its harvest source
167
+
168
+ The dataset will be cleaned from harvested information
169
+ """
170
+ log.info(f"Detaching dataset {dataset_id}")
171
+ dataset = Dataset.get(dataset_id)
172
+ actions.detach(dataset)
173
+ log.info("Done")
174
+
175
+
176
+ @grp.command()
177
+ @click.argument("identifier")
178
+ def detach_all_from_source(identifier):
179
+ """
180
+ Detach all datasets from a harvest source
181
+
182
+ All the datasets will be cleaned from harvested information.
183
+ Make sure the harvest source won't create new duplicate datasets,
184
+ either by deactivating it or filtering its scope, etc.
185
+ """
186
+ log.info(f"Detaching datasets from harvest source {identifier}")
187
+ count = actions.detach_all_from_source(actions.get_source(identifier))
188
+ log.info(f"Detached {count} datasets")
189
+ log.warning(
190
+ "Make sure the harvest source won't create new duplicate datasets, either by deactivating it or filtering its scope, etc."
191
+ )
udata/harvest/filters.py CHANGED
@@ -3,6 +3,9 @@ from voluptuous import Invalid
3
3
 
4
4
  from udata import tags, uris
5
5
 
6
+ TRUTHY_STRINGS = ("on", "t", "true", "y", "yes", "1")
7
+ FALSY_STRINGS = ("f", "false", "n", "no", "off", "0")
8
+
6
9
 
7
10
  def boolean(value):
8
11
  """
@@ -15,17 +18,25 @@ def boolean(value):
15
18
  if value is None or isinstance(value, bool):
16
19
  return value
17
20
 
18
- try:
19
- return bool(int(value))
20
- except ValueError:
21
+ if isinstance(value, int):
22
+ return bool(value)
23
+
24
+ if isinstance(value, str):
21
25
  lower_value = value.strip().lower()
26
+
22
27
  if not lower_value:
23
28
  return None
24
- if lower_value in ("f", "false", "n", "no", "off"):
29
+ if lower_value in FALSY_STRINGS:
25
30
  return False
26
- if lower_value in ("on", "t", "true", "y", "yes"):
31
+ if lower_value in TRUTHY_STRINGS:
27
32
  return True
28
- raise Invalid("Unable to parse boolean {0}".format(value))
33
+ raise Invalid(
34
+ f"Unable to parse string '{value}' as boolean. Supported values are {','.join(TRUTHY_STRINGS)} for `True` and {','.join(FALSY_STRINGS)} for `False`."
35
+ )
36
+
37
+ raise Invalid(
38
+ f"Cannot convert value {value} of type {type(value)} to boolean. Supported types are `bool`, `int` and `str`"
39
+ )
29
40
 
30
41
 
31
42
  def to_date(value):