udata 14.0.3.dev1__py3-none-any.whl → 14.7.3.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. udata/api/__init__.py +2 -0
  2. udata/api_fields.py +120 -19
  3. udata/app.py +18 -20
  4. udata/auth/__init__.py +4 -7
  5. udata/auth/forms.py +3 -3
  6. udata/auth/views.py +13 -6
  7. udata/commands/dcat.py +1 -1
  8. udata/commands/serve.py +3 -11
  9. udata/core/activity/api.py +5 -6
  10. udata/core/badges/tests/test_tasks.py +0 -2
  11. udata/core/csv.py +5 -0
  12. udata/core/dataservices/api.py +8 -1
  13. udata/core/dataservices/apiv2.py +3 -6
  14. udata/core/dataservices/models.py +5 -2
  15. udata/core/dataservices/rdf.py +2 -1
  16. udata/core/dataservices/tasks.py +6 -2
  17. udata/core/dataset/api.py +30 -4
  18. udata/core/dataset/api_fields.py +1 -1
  19. udata/core/dataset/apiv2.py +1 -1
  20. udata/core/dataset/constants.py +2 -9
  21. udata/core/dataset/models.py +21 -9
  22. udata/core/dataset/permissions.py +31 -0
  23. udata/core/dataset/rdf.py +18 -16
  24. udata/core/dataset/tasks.py +16 -7
  25. udata/core/discussions/api.py +15 -1
  26. udata/core/discussions/models.py +6 -0
  27. udata/core/legal/__init__.py +0 -0
  28. udata/core/legal/mails.py +128 -0
  29. udata/core/organization/api.py +16 -5
  30. udata/core/organization/api_fields.py +3 -3
  31. udata/core/organization/apiv2.py +3 -4
  32. udata/core/organization/mails.py +1 -1
  33. udata/core/organization/models.py +40 -7
  34. udata/core/organization/notifications.py +84 -0
  35. udata/core/organization/permissions.py +1 -1
  36. udata/core/organization/tasks.py +3 -0
  37. udata/core/pages/models.py +49 -0
  38. udata/core/pages/tests/test_api.py +165 -1
  39. udata/core/post/api.py +25 -70
  40. udata/core/post/constants.py +8 -0
  41. udata/core/post/models.py +109 -17
  42. udata/core/post/tests/test_api.py +140 -3
  43. udata/core/post/tests/test_models.py +24 -0
  44. udata/core/reports/api.py +18 -0
  45. udata/core/reports/models.py +42 -2
  46. udata/core/reuse/api.py +8 -0
  47. udata/core/reuse/apiv2.py +3 -6
  48. udata/core/reuse/models.py +1 -1
  49. udata/core/spatial/forms.py +2 -2
  50. udata/core/topic/models.py +8 -2
  51. udata/core/user/api.py +10 -3
  52. udata/core/user/api_fields.py +3 -3
  53. udata/core/user/models.py +33 -8
  54. udata/features/notifications/api.py +7 -18
  55. udata/features/notifications/models.py +59 -0
  56. udata/features/notifications/tasks.py +25 -0
  57. udata/features/transfer/actions.py +2 -0
  58. udata/features/transfer/models.py +17 -0
  59. udata/features/transfer/notifications.py +96 -0
  60. udata/flask_mongoengine/engine.py +0 -4
  61. udata/flask_mongoengine/pagination.py +1 -1
  62. udata/frontend/markdown.py +2 -1
  63. udata/harvest/actions.py +20 -0
  64. udata/harvest/api.py +24 -7
  65. udata/harvest/backends/base.py +27 -1
  66. udata/harvest/backends/ckan/harvesters.py +21 -4
  67. udata/harvest/backends/dcat.py +4 -1
  68. udata/harvest/commands.py +33 -0
  69. udata/harvest/filters.py +17 -6
  70. udata/harvest/models.py +16 -0
  71. udata/harvest/permissions.py +27 -0
  72. udata/harvest/tests/ckan/test_ckan_backend.py +33 -0
  73. udata/harvest/tests/test_actions.py +46 -2
  74. udata/harvest/tests/test_api.py +161 -6
  75. udata/harvest/tests/test_base_backend.py +86 -1
  76. udata/harvest/tests/test_dcat_backend.py +68 -3
  77. udata/harvest/tests/test_filters.py +6 -0
  78. udata/i18n.py +1 -4
  79. udata/mail.py +14 -0
  80. udata/migrations/2021-08-17-harvest-integrity.py +23 -16
  81. udata/migrations/2025-10-31-create-membership-request-notifications.py +55 -0
  82. udata/migrations/2025-12-04-add-uuid-to-discussion-messages.py +28 -0
  83. udata/migrations/2025-12-16-create-transfer-request-notifications.py +69 -0
  84. udata/migrations/2026-01-14-add-default-kind-to-posts.py +17 -0
  85. udata/mongo/slug_fields.py +1 -1
  86. udata/rdf.py +65 -11
  87. udata/routing.py +2 -2
  88. udata/settings.py +11 -0
  89. udata/tasks.py +2 -0
  90. udata/templates/mail/message.html +3 -1
  91. udata/tests/api/__init__.py +7 -17
  92. udata/tests/api/test_activities_api.py +36 -0
  93. udata/tests/api/test_datasets_api.py +69 -0
  94. udata/tests/api/test_organizations_api.py +0 -3
  95. udata/tests/api/test_reports_api.py +157 -0
  96. udata/tests/api/test_user_api.py +1 -1
  97. udata/tests/apiv2/test_dataservices.py +14 -0
  98. udata/tests/apiv2/test_organizations.py +9 -0
  99. udata/tests/apiv2/test_reuses.py +11 -0
  100. udata/tests/cli/test_cli_base.py +0 -1
  101. udata/tests/dataservice/test_dataservice_tasks.py +29 -0
  102. udata/tests/dataset/test_dataset_model.py +13 -1
  103. udata/tests/dataset/test_dataset_rdf.py +164 -5
  104. udata/tests/dataset/test_dataset_tasks.py +25 -0
  105. udata/tests/frontend/test_auth.py +58 -1
  106. udata/tests/frontend/test_csv.py +0 -3
  107. udata/tests/helpers.py +31 -27
  108. udata/tests/organization/test_notifications.py +67 -2
  109. udata/tests/search/test_search_integration.py +70 -0
  110. udata/tests/site/test_site_csv_exports.py +22 -10
  111. udata/tests/test_activity.py +9 -9
  112. udata/tests/test_api_fields.py +10 -0
  113. udata/tests/test_discussions.py +5 -5
  114. udata/tests/test_legal_mails.py +359 -0
  115. udata/tests/test_notifications.py +15 -57
  116. udata/tests/test_notifications_task.py +43 -0
  117. udata/tests/test_owned.py +81 -1
  118. udata/tests/test_transfer.py +181 -2
  119. udata/tests/test_uris.py +33 -0
  120. udata/translations/ar/LC_MESSAGES/udata.mo +0 -0
  121. udata/translations/ar/LC_MESSAGES/udata.po +309 -158
  122. udata/translations/de/LC_MESSAGES/udata.mo +0 -0
  123. udata/translations/de/LC_MESSAGES/udata.po +313 -160
  124. udata/translations/es/LC_MESSAGES/udata.mo +0 -0
  125. udata/translations/es/LC_MESSAGES/udata.po +312 -160
  126. udata/translations/fr/LC_MESSAGES/udata.mo +0 -0
  127. udata/translations/fr/LC_MESSAGES/udata.po +475 -202
  128. udata/translations/it/LC_MESSAGES/udata.mo +0 -0
  129. udata/translations/it/LC_MESSAGES/udata.po +317 -162
  130. udata/translations/pt/LC_MESSAGES/udata.mo +0 -0
  131. udata/translations/pt/LC_MESSAGES/udata.po +315 -161
  132. udata/translations/sr/LC_MESSAGES/udata.mo +0 -0
  133. udata/translations/sr/LC_MESSAGES/udata.po +323 -164
  134. udata/translations/udata.pot +169 -124
  135. udata/uris.py +0 -2
  136. udata/utils.py +23 -0
  137. udata-14.7.3.dev4.dist-info/METADATA +109 -0
  138. {udata-14.0.3.dev1.dist-info → udata-14.7.3.dev4.dist-info}/RECORD +142 -135
  139. udata/core/post/forms.py +0 -30
  140. udata/flask_mongoengine/json.py +0 -38
  141. udata/templates/mail/base.html +0 -105
  142. udata/templates/mail/base.txt +0 -6
  143. udata/templates/mail/button.html +0 -3
  144. udata/templates/mail/layouts/1-column.html +0 -19
  145. udata/templates/mail/layouts/2-columns.html +0 -20
  146. udata/templates/mail/layouts/center-panel.html +0 -16
  147. udata-14.0.3.dev1.dist-info/METADATA +0 -132
  148. {udata-14.0.3.dev1.dist-info → udata-14.7.3.dev4.dist-info}/WHEEL +0 -0
  149. {udata-14.0.3.dev1.dist-info → udata-14.7.3.dev4.dist-info}/entry_points.txt +0 -0
  150. {udata-14.0.3.dev1.dist-info → udata-14.7.3.dev4.dist-info}/licenses/LICENSE +0 -0
  151. {udata-14.0.3.dev1.dist-info → udata-14.7.3.dev4.dist-info}/top_level.txt +0 -0
@@ -23,8 +23,12 @@ def purge_dataservices(self):
23
23
  Follow.objects(following=dataservice).delete()
24
24
  # Remove discussions
25
25
  Discussion.objects(subject=dataservice).delete()
26
- # Remove HarvestItem references
27
- HarvestJob.objects(items__dataservice=dataservice).update(set__items__S__dataservice=None)
26
+ # Remove HarvestItem references (using update_many with array_filters to update all matching items)
27
+ HarvestJob._get_collection().update_many(
28
+ {"items.dataservice": dataservice.id},
29
+ {"$set": {"items.$[item].dataservice": None}},
30
+ array_filters=[{"item.dataservice": dataservice.id}],
31
+ )
28
32
  # Remove associated Transfers
29
33
  Transfer.objects(subject=dataservice).delete()
30
34
  # Remove dataservices references in Topics
udata/core/dataset/api.py CHANGED
@@ -39,6 +39,7 @@ from udata.core.dataservices.models import Dataservice
39
39
  from udata.core.dataset.models import CHECKSUM_TYPES
40
40
  from udata.core.followers.api import FollowAPI
41
41
  from udata.core.followers.models import Follow
42
+ from udata.core.legal.mails import add_send_legal_notice_argument, send_legal_notice_on_deletion
42
43
  from udata.core.organization.models import Organization
43
44
  from udata.core.reuse.models import Reuse
44
45
  from udata.core.storages.api import handle_upload, upload_parser
@@ -327,17 +328,33 @@ class DatasetListAPI(API):
327
328
  @ns.route("/recent.atom", endpoint="recent_datasets_atom_feed")
328
329
  class DatasetsAtomFeedAPI(API):
329
330
  @api.doc("recent_datasets_atom_feed")
331
+ @api.expect(dataset_parser.parser)
330
332
  def get(self):
333
+ args = dataset_parser.parse()
334
+ queryset = Dataset.objects.visible()
335
+ queryset = DatasetApiParser.parse_filters(queryset, args)
336
+
337
+ q = args.get("q").strip() if args.get("q") else ""
338
+ has_filters = any(
339
+ args.get(k)
340
+ for k in ["q", "tag", "license", "organization", "owner", "format", "badge", "topic"]
341
+ )
342
+
343
+ if q:
344
+ title = _("Datasets search: {q}").format(q=q)
345
+ elif has_filters:
346
+ title = _("Filtered datasets")
347
+ else:
348
+ title = _("Latest datasets")
349
+
331
350
  feed = Atom1Feed(
332
- _("Latest datasets"),
351
+ title,
333
352
  description=None,
334
353
  feed_url=request.url,
335
354
  link=request.url_root,
336
355
  )
337
356
 
338
- datasets: list[Dataset] = get_rss_feed_list(
339
- Dataset.objects.visible(), "created_at_internal"
340
- )
357
+ datasets: list[Dataset] = get_rss_feed_list(queryset, "created_at_internal")
341
358
 
342
359
  for dataset in datasets:
343
360
  author_name = None
@@ -364,6 +381,9 @@ class DatasetsAtomFeedAPI(API):
364
381
  return response
365
382
 
366
383
 
384
+ dataset_delete_parser = add_send_legal_notice_argument(api.parser())
385
+
386
+
367
387
  @ns.route("/<dataset:dataset>/", endpoint="dataset", doc=common_doc)
368
388
  @api.response(404, "Dataset not found")
369
389
  @api.response(410, "Dataset has been deleted")
@@ -397,12 +417,16 @@ class DatasetAPI(API):
397
417
 
398
418
  @api.secure
399
419
  @api.doc("delete_dataset")
420
+ @api.expect(dataset_delete_parser)
400
421
  @api.response(204, "Dataset deleted")
401
422
  def delete(self, dataset):
402
423
  """Delete a dataset given its identifier"""
424
+ args = dataset_delete_parser.parse_args()
403
425
  if dataset.deleted:
404
426
  api.abort(410, "Dataset has been deleted")
405
427
  dataset.permissions["delete"].test()
428
+ send_legal_notice_on_deletion(dataset, args)
429
+
406
430
  dataset.deleted = datetime.utcnow()
407
431
  dataset.last_modified_internal = datetime.utcnow()
408
432
  dataset.save()
@@ -531,6 +555,8 @@ class ResourcesAPI(API):
531
555
  f"All resources must be reordered, you provided {len(resources)} "
532
556
  f"out of {len(dataset.resources)}",
533
557
  )
558
+ if any(isinstance(r, dict) and "id" not in r for r in resources):
559
+ api.abort(400, "Each resource must have an 'id' field")
534
560
  if set(r["id"] if isinstance(r, dict) else r for r in resources) != set(
535
561
  str(r.id) for r in dataset.resources
536
562
  ):
@@ -332,7 +332,7 @@ dataset_fields = api.model(
332
332
  "id": fields.String(description="The dataset identifier", readonly=True),
333
333
  "title": fields.String(description="The dataset title", required=True),
334
334
  "acronym": fields.String(description="An optional dataset acronym"),
335
- "slug": fields.String(description="The dataset permalink string", required=True),
335
+ "slug": fields.String(description="The dataset permalink string", readonly=True),
336
336
  "description": fields.Markdown(
337
337
  description="The dataset description in markdown", required=True
338
338
  ),
@@ -108,7 +108,7 @@ dataset_fields = apiv2.model(
108
108
  "id": fields.String(description="The dataset identifier", readonly=True),
109
109
  "title": fields.String(description="The dataset title", required=True),
110
110
  "acronym": fields.String(description="An optional dataset acronym"),
111
- "slug": fields.String(description="The dataset permalink string", required=True),
111
+ "slug": fields.String(description="The dataset permalink string", readonly=True),
112
112
  "description": fields.Markdown(
113
113
  description="The dataset description in markdown", required=True
114
114
  ),
@@ -1,5 +1,5 @@
1
1
  from collections import OrderedDict
2
- from datetime import date, datetime, timedelta
2
+ from datetime import datetime, timedelta
3
3
  from enum import StrEnum, auto
4
4
 
5
5
  from flask_babel import LazyString
@@ -100,14 +100,7 @@ class UpdateFrequency(StrEnum):
100
100
  return self._delta # type: ignore[misc]
101
101
 
102
102
  def next_update(self, last_update: datetime) -> datetime | None:
103
- if not self.delta:
104
- return None
105
- result = last_update + self.delta
106
- # Convert datetime.date to datetime.datetime for BSON compatibility
107
- # MongoDB/BSON cannot encode datetime.date objects, only datetime.datetime
108
- if isinstance(result, date) and not isinstance(result, datetime):
109
- result = datetime.combine(result, datetime.min.time())
110
- return result
103
+ return last_update + self.delta if self.delta else None
111
104
 
112
105
 
113
106
  # We must declare UpdateFrequency class variables after the Enum magic
@@ -15,17 +15,19 @@ from mongoengine.fields import DateTimeField
15
15
  from mongoengine.signals import post_save, pre_init, pre_save
16
16
  from werkzeug.utils import cached_property
17
17
 
18
- from udata.api_fields import field
18
+ from udata.api_fields import field, generate_fields
19
19
  from udata.app import cache
20
20
  from udata.core import storages
21
21
  from udata.core.access_type.constants import AccessType
22
22
  from udata.core.access_type.models import WithAccessType, check_only_one_condition_per_role
23
23
  from udata.core.activity.models import Auditable
24
24
  from udata.core.constants import HVD
25
+ from udata.core.dataset.api_fields import temporal_coverage_fields
25
26
  from udata.core.dataset.preview import TabularAPIPreview
26
27
  from udata.core.linkable import Linkable
27
28
  from udata.core.metrics.helpers import get_stock_metrics
28
29
  from udata.core.owned import Owned, OwnedQuerySet
30
+ from udata.core.spatial.api_fields import spatial_coverage_fields
29
31
  from udata.frontend.markdown import mdstrip
30
32
  from udata.i18n import lazy_gettext as _
31
33
  from udata.models import Badge, BadgeMixin, BadgesList, SpatialCoverage, WithMetrics, db
@@ -89,6 +91,7 @@ def get_json_ld_extra(key, value):
89
91
  }
90
92
 
91
93
 
94
+ @generate_fields()
92
95
  class HarvestDatasetMetadata(db.EmbeddedDocument):
93
96
  backend = db.StringField()
94
97
  created_at = db.DateTimeField()
@@ -114,6 +117,7 @@ class HarvestResourceMetadata(db.EmbeddedDocument):
114
117
  dct_identifier = db.StringField()
115
118
 
116
119
 
120
+ @generate_fields()
117
121
  class Schema(db.EmbeddedDocument):
118
122
  """
119
123
  Schema can only be two things right now:
@@ -482,6 +486,7 @@ class ResourceMixin(object):
482
486
  return result
483
487
 
484
488
 
489
+ @generate_fields()
485
490
  class Resource(ResourceMixin, WithMetrics, db.EmbeddedDocument):
486
491
  """
487
492
  Local file, remote file or API provided by the original provider of the
@@ -533,6 +538,7 @@ class DatasetBadgeMixin(BadgeMixin):
533
538
  __badges__ = BADGES
534
539
 
535
540
 
541
+ @generate_fields()
536
542
  class Dataset(
537
543
  Auditable, WithMetrics, WithAccessType, DatasetBadgeMixin, Owned, Linkable, db.Document
538
544
  ):
@@ -546,7 +552,10 @@ class Dataset(
546
552
  ),
547
553
  auditable=False,
548
554
  )
549
- description = field(db.StringField(required=True, default=""))
555
+ description = field(
556
+ db.StringField(required=True, default=""),
557
+ markdown=True,
558
+ )
550
559
  description_short = field(db.StringField(max_length=DESCRIPTION_SHORT_SIZE_LIMIT))
551
560
  license = field(db.ReferenceField("License"))
552
561
 
@@ -557,8 +566,14 @@ class Dataset(
557
566
 
558
567
  frequency = field(db.EnumField(UpdateFrequency))
559
568
  frequency_date = field(db.DateTimeField(verbose_name=_("Future date of update")))
560
- temporal_coverage = field(db.EmbeddedDocumentField(db.DateRange))
561
- spatial = field(db.EmbeddedDocumentField(SpatialCoverage))
569
+ temporal_coverage = field(
570
+ db.EmbeddedDocumentField(db.DateRange),
571
+ nested_fields=temporal_coverage_fields,
572
+ )
573
+ spatial = field(
574
+ db.EmbeddedDocumentField(SpatialCoverage),
575
+ nested_fields=spatial_coverage_fields,
576
+ )
562
577
  schema = field(db.EmbeddedDocumentField(Schema))
563
578
 
564
579
  ext = field(db.MapField(db.GenericEmbeddedDocumentField()), auditable=False)
@@ -730,7 +745,7 @@ class Dataset(
730
745
  }
731
746
 
732
747
  def self_web_url(self, **kwargs):
733
- return cdata_url(f"/datasets/{self._link_id(**kwargs)}/", **kwargs)
748
+ return cdata_url(f"/datasets/{self._link_id(**kwargs)}", **kwargs)
734
749
 
735
750
  def self_api_url(self, **kwargs):
736
751
  return url_for(
@@ -795,7 +810,7 @@ class Dataset(
795
810
  Resources should be fetched when calling this method.
796
811
  """
797
812
  if self.harvest and self.harvest.modified_at:
798
- return self.harvest.modified_at
813
+ return to_naive_datetime(self.harvest.modified_at)
799
814
  if self.resources:
800
815
  return max([res.last_modified for res in self.resources])
801
816
  else:
@@ -1148,9 +1163,6 @@ class ResourceSchema(object):
1148
1163
  except requests.exceptions.RequestException as err:
1149
1164
  log.exception(f"Error while getting schema catalog from {endpoint}: {err}")
1150
1165
  schemas = cache.get(cache_key)
1151
- except requests.exceptions.JSONDecodeError as err:
1152
- log.exception(f"Error while getting schema catalog from {endpoint}: {err}")
1153
- schemas = cache.get(cache_key)
1154
1166
  else:
1155
1167
  schemas = data.get("schemas", [])
1156
1168
  cache.set(cache_key, schemas)
@@ -1,3 +1,6 @@
1
+ from flask_principal import Permission as BasePermission
2
+ from flask_principal import RoleNeed
3
+
1
4
  from udata.auth import Permission, UserNeed
2
5
  from udata.core.organization.permissions import (
3
6
  OrganizationAdminNeed,
@@ -22,6 +25,34 @@ class OwnablePermission(Permission):
22
25
  super(OwnablePermission, self).__init__(*needs)
23
26
 
24
27
 
28
+ class OwnableReadPermission(BasePermission):
29
+ """Permission to read a private ownable object.
30
+
31
+ Always grants access if the object is not private.
32
+ For private objects, requires owner, org member, or sysadmin.
33
+
34
+ We inherit from BasePermission instead of udata's Permission because
35
+ Permission automatically adds RoleNeed("admin") to all needs. This means
36
+ a permission with no needs would only allow admins. With BasePermission,
37
+ an empty needs set allows everyone (Flask-Principal returns True when
38
+ self.needs is empty).
39
+ """
40
+
41
+ def __init__(self, obj):
42
+ if not getattr(obj, "private", False):
43
+ super().__init__()
44
+ return
45
+
46
+ needs = [RoleNeed("admin")]
47
+ if obj.organization:
48
+ needs.append(OrganizationAdminNeed(obj.organization.id))
49
+ needs.append(OrganizationEditorNeed(obj.organization.id))
50
+ elif obj.owner:
51
+ needs.append(UserNeed(obj.owner.fs_uniquifier))
52
+
53
+ super().__init__(*needs)
54
+
55
+
25
56
  class DatasetEditPermission(OwnablePermission):
26
57
  """Permissions to edit a Dataset"""
27
58
 
udata/core/dataset/rdf.py CHANGED
@@ -5,7 +5,7 @@ This module centralize dataset helpers for RDF/DCAT serialization and parsing
5
5
  import calendar
6
6
  import json
7
7
  import logging
8
- from datetime import date, datetime
8
+ from datetime import date
9
9
 
10
10
  from dateutil.parser import parse as parse_dt
11
11
  from flask import current_app
@@ -51,7 +51,7 @@ from udata.rdf import (
51
51
  themes_from_rdf,
52
52
  url_from_rdf,
53
53
  )
54
- from udata.utils import get_by, safe_unicode, to_naive_datetime
54
+ from udata.utils import get_by, safe_harvest_datetime, safe_unicode
55
55
 
56
56
  from .constants import OGC_SERVICE_FORMATS, UpdateFrequency
57
57
  from .models import Checksum, Dataset, License, Resource
@@ -729,12 +729,10 @@ def resource_from_rdf(graph_or_distrib, dataset=None, is_additionnal=False):
729
729
  resource.harvest = HarvestResourceMetadata()
730
730
  resource.harvest.issued_at = issued_at
731
731
 
732
- # In the past, we've encountered future `modified_at` during harvesting
733
- # do not save it. :FutureHarvestModifiedAt
734
- if modified_at and to_naive_datetime(modified_at) > datetime.utcnow():
735
- log.warning(f"Future `DCT.modified` date '{modified_at}' in resource")
736
- else:
737
- resource.harvest.modified_at = modified_at
732
+ # :FutureHarvestModifiedAt
733
+ resource.harvest.modified_at = safe_harvest_datetime(
734
+ modified_at, "DCT.modified (resource)", refuse_future=True
735
+ )
738
736
 
739
737
  resource.harvest.dct_identifier = identifier
740
738
  resource.harvest.uri = uri
@@ -742,7 +740,13 @@ def resource_from_rdf(graph_or_distrib, dataset=None, is_additionnal=False):
742
740
  return resource
743
741
 
744
742
 
745
- def dataset_from_rdf(graph: Graph, dataset=None, node=None, remote_url_prefix: str | None = None):
743
+ def dataset_from_rdf(
744
+ graph: Graph,
745
+ dataset=None,
746
+ node=None,
747
+ remote_url_prefix: str | None = None,
748
+ dryrun: bool = False,
749
+ ):
746
750
  """
747
751
  Create or update a dataset from a RDF/DCAT graph
748
752
  """
@@ -764,7 +768,7 @@ def dataset_from_rdf(graph: Graph, dataset=None, node=None, remote_url_prefix: s
764
768
  dataset.description = sanitize_html(description)
765
769
  dataset.frequency = frequency_from_rdf(d.value(DCT.accrualPeriodicity)) or dataset.frequency
766
770
  roles = [ # Imbricated list of contact points for each role
767
- contact_points_from_rdf(d, rdf_entity, role, dataset)
771
+ contact_points_from_rdf(d, rdf_entity, role, dataset, dryrun=dryrun)
768
772
  for rdf_entity, role in CONTACT_POINT_ENTITY_TO_ROLE.items()
769
773
  ]
770
774
  dataset.contact_points = [ # Flattened list of contact points
@@ -839,12 +843,10 @@ def dataset_from_rdf(graph: Graph, dataset=None, node=None, remote_url_prefix: s
839
843
  dataset.harvest.created_at = created_at
840
844
  dataset.harvest.issued_at = issued_at
841
845
 
842
- # In the past, we've encountered future `modified_at` during harvesting
843
- # do not save it. :FutureHarvestModifiedAt
844
- if modified_at and to_naive_datetime(modified_at) > datetime.utcnow():
845
- log.warning(f"Future `DCT.modified` date '{modified_at}' in dataset")
846
- else:
847
- dataset.harvest.modified_at = modified_at
846
+ # :FutureHarvestModifiedAt
847
+ dataset.harvest.modified_at = safe_harvest_datetime(
848
+ modified_at, "DCT.modified (dataset)", refuse_future=True
849
+ )
848
850
 
849
851
  return dataset
850
852
 
@@ -1,4 +1,5 @@
1
1
  import collections
2
+ import gzip
2
3
  import os
3
4
  from datetime import date, datetime
4
5
  from tempfile import NamedTemporaryFile
@@ -53,8 +54,12 @@ def purge_datasets(self):
53
54
  datasets = dataservice.datasets
54
55
  datasets.remove(dataset)
55
56
  dataservice.update(datasets=datasets)
56
- # Remove HarvestItem references
57
- HarvestJob.objects(items__dataset=dataset).update(set__items__S__dataset=None)
57
+ # Remove HarvestItem references (using update_many with array_filters to update all matching items)
58
+ HarvestJob._get_collection().update_many(
59
+ {"items.dataset": dataset.id},
60
+ {"$set": {"items.$[item].dataset": None}},
61
+ array_filters=[{"item.dataset": dataset.id}],
62
+ )
58
63
  # Remove datasets in pages (mongoengine doesn't support updating a field in a generic embed)
59
64
  Page._get_collection().update_many(
60
65
  {"blocs.datasets": dataset.id},
@@ -94,8 +99,7 @@ def get_queryset(model_cls):
94
99
  for attr in attrs:
95
100
  if getattr(model_cls, attr, None):
96
101
  params[attr] = False
97
- # no_cache to avoid eating up too much RAM
98
- return model_cls.objects.filter(**params).no_cache()
102
+ return model_cls.objects.filter(**params)
99
103
 
100
104
 
101
105
  def get_resource_for_csv_export_model(model, dataset):
@@ -173,7 +177,12 @@ def export_csv_for_model(model, dataset, replace: bool = False):
173
177
  dataset.save()
174
178
  # remove previous catalog if exists and replace is True
175
179
  if replace and fs_filename_to_remove:
176
- storages.resources.delete(fs_filename_to_remove)
180
+ try:
181
+ storages.resources.delete(fs_filename_to_remove)
182
+ except FileNotFoundError:
183
+ log.error(
184
+ f"File not found while deleting resource #{resource.id} ({fs_filename_to_remove}) in export_csv_for_model cleanup"
185
+ )
177
186
  return resource
178
187
  finally:
179
188
  csvfile.close()
@@ -217,8 +226,8 @@ def export_csv(self, model=None):
217
226
  with storages.resources.open(resource.fs_filename, "rb") as f:
218
227
  store_bytes(
219
228
  bucket=current_app.config["EXPORT_CSV_ARCHIVE_S3_BUCKET"],
220
- filename=f"{current_app.config['EXPORT_CSV_ARCHIVE_S3_FILENAME_PREFIX']}{resource.title}",
221
- bytes=f.read(),
229
+ filename=f"{current_app.config['EXPORT_CSV_ARCHIVE_S3_FILENAME_PREFIX']}{resource.title}.gz",
230
+ bytes=gzip.compress(f.read()),
222
231
  )
223
232
 
224
233
 
@@ -7,6 +7,7 @@ from flask_security import current_user
7
7
  from udata.api import API, api, fields
8
8
  from udata.core.dataservices.models import Dataservice
9
9
  from udata.core.dataset.models import Dataset
10
+ from udata.core.legal.mails import add_send_legal_notice_argument, send_legal_notice_on_deletion
10
11
  from udata.core.organization.api_fields import org_ref_fields
11
12
  from udata.core.organization.models import Organization
12
13
  from udata.core.reuse.models import Reuse
@@ -164,6 +165,9 @@ class DiscussionSpamAPI(SpamAPIMixin):
164
165
  model = Discussion
165
166
 
166
167
 
168
+ discussion_delete_parser = add_send_legal_notice_argument(api.parser())
169
+
170
+
167
171
  @ns.route("/<id>/", endpoint="discussion")
168
172
  class DiscussionAPI(API):
169
173
  """
@@ -236,11 +240,14 @@ class DiscussionAPI(API):
236
240
  return discussion
237
241
 
238
242
  @api.doc("delete_discussion")
243
+ @api.expect(discussion_delete_parser)
239
244
  @api.response(403, "Not allowed to delete this discussion")
240
245
  def delete(self, id):
241
246
  """Delete a discussion given its ID"""
247
+ args = discussion_delete_parser.parse_args()
242
248
  discussion = Discussion.objects.get_or_404(id=id_or_404(id))
243
249
  discussion.permissions["delete"].test()
250
+ send_legal_notice_on_deletion(discussion, args)
244
251
 
245
252
  discussion.delete()
246
253
  on_discussion_deleted.send(discussion)
@@ -259,6 +266,9 @@ class DiscussionCommentSpamAPI(SpamAPIMixin):
259
266
  return discussion, discussion.discussion[cidx]
260
267
 
261
268
 
269
+ message_delete_parser = add_send_legal_notice_argument(api.parser())
270
+
271
+
262
272
  @ns.route("/<id>/comments/<int:cidx>/", endpoint="discussion_comment")
263
273
  class DiscussionCommentAPI(API):
264
274
  """
@@ -286,16 +296,20 @@ class DiscussionCommentAPI(API):
286
296
  return discussion
287
297
 
288
298
  @api.doc("delete_discussion_comment")
299
+ @api.expect(message_delete_parser)
289
300
  @api.response(403, "Not allowed to delete this comment")
290
301
  def delete(self, id, cidx):
291
302
  """Delete a comment given its index"""
303
+ args = message_delete_parser.parse_args()
292
304
  discussion = Discussion.objects.get_or_404(id=id_or_404(id))
293
305
  if len(discussion.discussion) <= cidx:
294
306
  api.abort(404, "Comment does not exist")
295
307
  elif cidx == 0:
296
308
  api.abort(400, "You cannot delete the first comment of a discussion")
297
309
 
298
- discussion.discussion[cidx].permissions["delete"].test()
310
+ message = discussion.discussion[cidx]
311
+ message.permissions["delete"].test()
312
+ send_legal_notice_on_deletion(message, args)
299
313
 
300
314
  discussion.discussion.pop(cidx)
301
315
  discussion.save()
@@ -6,6 +6,7 @@ from flask_login import current_user
6
6
 
7
7
  from udata.core.linkable import Linkable
8
8
  from udata.core.spam.models import SpamMixin, spam_protected
9
+ from udata.i18n import lazy_gettext as _
9
10
  from udata.mongo import db
10
11
 
11
12
  from .signals import on_discussion_closed, on_new_discussion, on_new_discussion_comment
@@ -14,6 +15,9 @@ log = logging.getLogger(__name__)
14
15
 
15
16
 
16
17
  class Message(SpamMixin, db.EmbeddedDocument):
18
+ verbose_name = _("message")
19
+
20
+ id = db.AutoUUIDField()
17
21
  content = db.StringField(required=True)
18
22
  posted_on = db.DateTimeField(default=datetime.utcnow, required=True)
19
23
  posted_by = db.ReferenceField("User")
@@ -69,6 +73,8 @@ class Message(SpamMixin, db.EmbeddedDocument):
69
73
 
70
74
 
71
75
  class Discussion(SpamMixin, Linkable, db.Document):
76
+ verbose_name = _("discussion")
77
+
72
78
  user = db.ReferenceField("User")
73
79
  organization = db.ReferenceField("Organization")
74
80
 
File without changes
@@ -0,0 +1,128 @@
1
+ from flask import current_app
2
+ from flask_babel import LazyString
3
+ from flask_login import current_user
4
+ from flask_restx.inputs import boolean
5
+
6
+ from udata.core.dataservices.models import Dataservice
7
+ from udata.core.dataset.models import Dataset
8
+ from udata.core.discussions.models import Discussion, Message
9
+ from udata.core.organization.models import Organization
10
+ from udata.core.reuse.models import Reuse
11
+ from udata.core.user.models import User
12
+ from udata.i18n import lazy_gettext as _
13
+ from udata.mail import Link, MailMessage, ParagraphWithLinks
14
+
15
+ DeletableObject = Dataset | Reuse | Dataservice | Organization | User | Discussion | Message
16
+
17
+
18
+ def add_send_legal_notice_argument(parser):
19
+ """Add the send_legal_notice argument to a parser.
20
+
21
+ When send_legal_notice=true is passed by an admin, a formal legal notice email
22
+ is sent to the content owner. This email includes terms of use references and
23
+ information about how to contest the deletion (administrative appeal).
24
+ """
25
+ parser.add_argument(
26
+ "send_legal_notice",
27
+ type=boolean,
28
+ default=False,
29
+ location="args",
30
+ help="Send formal legal notice with appeal information to owner (admin only)",
31
+ )
32
+ return parser
33
+
34
+
35
+ def _get_recipients_for_organization(org: Organization) -> list[User]:
36
+ return [m.user for m in org.by_role("admin")]
37
+
38
+
39
+ def _get_recipients_for_owned_object(obj: Dataset | Reuse | Dataservice) -> list[User]:
40
+ if obj.owner:
41
+ return [obj.owner]
42
+ elif obj.organization:
43
+ return _get_recipients_for_organization(obj.organization)
44
+ return []
45
+
46
+
47
+ def send_legal_notice_on_deletion(obj: DeletableObject, args: dict):
48
+ """Send a formal legal notice email when content is deleted by an admin.
49
+
50
+ The email is only sent if:
51
+ - send_legal_notice=true was passed in args
52
+ - The current user is a sysadmin
53
+ """
54
+ if not args.get("send_legal_notice") or not current_user.sysadmin:
55
+ return
56
+
57
+ if isinstance(obj, Organization):
58
+ recipients = _get_recipients_for_organization(obj)
59
+ elif isinstance(obj, User):
60
+ recipients = [obj]
61
+ elif isinstance(obj, Discussion):
62
+ recipients = [obj.user] if obj.user else []
63
+ elif isinstance(obj, Message):
64
+ recipients = [obj.posted_by] if obj.posted_by else []
65
+ else:
66
+ recipients = _get_recipients_for_owned_object(obj)
67
+
68
+ if recipients:
69
+ _content_deleted(obj.verbose_name).send(recipients)
70
+
71
+
72
+ def _content_deleted(content_type_label: LazyString) -> MailMessage:
73
+ admin = current_user._get_current_object()
74
+ terms_of_use_url = current_app.config.get("TERMS_OF_USE_URL")
75
+ terms_of_use_deletion_article = current_app.config.get("TERMS_OF_USE_DELETION_ARTICLE")
76
+ telerecours_url = current_app.config.get("TELERECOURS_URL")
77
+
78
+ if terms_of_use_url and terms_of_use_deletion_article:
79
+ terms_paragraph = ParagraphWithLinks(
80
+ _(
81
+ 'Our %(terms_link)s specify in point %(article)s that the platform is not "intended '
82
+ "to disseminate advertising content, promotions of private interests, content contrary "
83
+ "to public order, illegal content, spam and any contribution violating the applicable "
84
+ "legal framework. The Editor reserves the right, without prior notice, to remove or "
85
+ "make inaccessible content published on the Platform that has no connection with its "
86
+ 'Purpose. The Editor does not carry out "a priori" control over publications. As soon '
87
+ "as the Editor becomes aware of content contrary to these terms of use, it acts quickly "
88
+ 'to remove or make it inaccessible".',
89
+ terms_link=Link(_("terms of use"), terms_of_use_url),
90
+ article=terms_of_use_deletion_article,
91
+ )
92
+ )
93
+ else:
94
+ terms_paragraph = _(
95
+ 'The platform is not "intended to disseminate advertising content, promotions of '
96
+ "private interests, content contrary to public order, illegal content, spam and any "
97
+ "contribution violating the applicable legal framework. The Editor reserves the right, "
98
+ "without prior notice, to remove or make inaccessible content published on the Platform "
99
+ 'that has no connection with its Purpose. The Editor does not carry out "a priori" '
100
+ "control over publications. As soon as the Editor becomes aware of content contrary to "
101
+ 'these terms of use, it acts quickly to remove or make it inaccessible".'
102
+ )
103
+
104
+ if telerecours_url:
105
+ appeal_paragraph = ParagraphWithLinks(
106
+ _(
107
+ "You may contest this decision within two months of its notification by filing "
108
+ "an administrative appeal (recours gracieux ou hiérarchique). You may also bring "
109
+ 'the matter before the administrative court via the "%(telerecours_link)s" application.',
110
+ telerecours_link=Link(_("Télérecours citoyens"), telerecours_url),
111
+ )
112
+ )
113
+ else:
114
+ appeal_paragraph = _("You may contest this decision by contacting us.")
115
+
116
+ paragraphs = [
117
+ _("Your %(content_type)s has been deleted.", content_type=content_type_label),
118
+ terms_paragraph,
119
+ appeal_paragraph,
120
+ _("Best regards,"),
121
+ admin.fullname,
122
+ _("%(site)s team member", site=current_app.config.get("SITE_TITLE", "data.gouv.fr")),
123
+ ]
124
+
125
+ return MailMessage(
126
+ subject=_("Deletion of your %(content_type)s", content_type=content_type_label),
127
+ paragraphs=paragraphs,
128
+ )