udata 8.0.2.dev29304__py2.py3-none-any.whl → 9.1.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of udata might be problematic. Click here for more details.

Files changed (86) hide show
  1. udata/__init__.py +1 -1
  2. udata/api/__init__.py +2 -0
  3. udata/api/commands.py +0 -2
  4. udata/api_fields.py +41 -3
  5. udata/commands/db.py +88 -48
  6. udata/core/dataservices/factories.py +33 -0
  7. udata/core/dataservices/models.py +42 -4
  8. udata/core/dataservices/rdf.py +106 -0
  9. udata/core/dataset/csv.py +8 -1
  10. udata/core/dataset/models.py +1 -2
  11. udata/core/dataset/rdf.py +37 -128
  12. udata/core/discussions/models.py +20 -0
  13. udata/core/organization/csv.py +5 -3
  14. udata/core/reports/__init__.py +0 -0
  15. udata/core/reports/api.py +44 -0
  16. udata/core/reports/constants.py +30 -0
  17. udata/core/reports/models.py +58 -0
  18. udata/core/reuse/csv.py +3 -0
  19. udata/core/site/api.py +33 -2
  20. udata/core/site/rdf.py +6 -1
  21. udata/core/spam/models.py +6 -0
  22. udata/core/topic/models.py +3 -2
  23. udata/core/topic/parsers.py +3 -2
  24. udata/core/user/apiv2.py +28 -0
  25. udata/db/__init__.py +0 -0
  26. udata/db/tasks.py +6 -0
  27. udata/features/notifications/__init__.py +0 -1
  28. udata/forms/fields.py +2 -2
  29. udata/harvest/api.py +19 -1
  30. udata/harvest/backends/base.py +118 -10
  31. udata/harvest/backends/dcat.py +28 -7
  32. udata/harvest/models.py +6 -0
  33. udata/harvest/tests/dcat/bnodes.xml +13 -2
  34. udata/harvest/tests/test_dcat_backend.py +21 -0
  35. udata/migrations/2024-06-11-fix-reuse-datasets-references.py +35 -0
  36. udata/models/__init__.py +1 -0
  37. udata/rdf.py +113 -2
  38. udata/routing.py +1 -1
  39. udata/settings.py +3 -1
  40. udata/static/admin.js +17 -17
  41. udata/static/admin.js.map +1 -1
  42. udata/static/chunks/{18.ad41fb75ac4226e1f3ce.js → 18.1922fd0b2b7fad122991.js} +3 -3
  43. udata/static/chunks/18.1922fd0b2b7fad122991.js.map +1 -0
  44. udata/static/chunks/{7.11ac4de064ae59691d49.js → 7.e2106342e94ee09393b1.js} +2 -2
  45. udata/static/chunks/7.e2106342e94ee09393b1.js.map +1 -0
  46. udata/static/common.js +1 -1
  47. udata/static/common.js.map +1 -1
  48. udata/storage/s3.py +3 -3
  49. udata/tasks.py +1 -0
  50. udata/tests/api/test_dataservices_api.py +26 -2
  51. udata/tests/api/test_datasets_api.py +1 -1
  52. udata/tests/api/test_reports_api.py +87 -0
  53. udata/tests/apiv2/test_me_api.py +40 -0
  54. udata/tests/dataset/test_dataset_rdf.py +19 -1
  55. udata/tests/frontend/test_auth.py +1 -4
  56. udata/tests/organization/test_csv_adapter.py +0 -1
  57. udata/tests/plugin.py +2 -0
  58. udata/tests/site/test_site_api.py +0 -1
  59. udata/tests/site/test_site_rdf.py +66 -0
  60. udata/tests/test_discussions.py +24 -34
  61. udata/tests/test_model.py +3 -2
  62. udata/tests/test_utils.py +1 -1
  63. udata/translations/ar/LC_MESSAGES/udata.mo +0 -0
  64. udata/translations/ar/LC_MESSAGES/udata.po +128 -64
  65. udata/translations/de/LC_MESSAGES/udata.mo +0 -0
  66. udata/translations/de/LC_MESSAGES/udata.po +128 -64
  67. udata/translations/es/LC_MESSAGES/udata.mo +0 -0
  68. udata/translations/es/LC_MESSAGES/udata.po +128 -64
  69. udata/translations/fr/LC_MESSAGES/udata.mo +0 -0
  70. udata/translations/fr/LC_MESSAGES/udata.po +128 -64
  71. udata/translations/it/LC_MESSAGES/udata.mo +0 -0
  72. udata/translations/it/LC_MESSAGES/udata.po +128 -64
  73. udata/translations/pt/LC_MESSAGES/udata.mo +0 -0
  74. udata/translations/pt/LC_MESSAGES/udata.po +128 -64
  75. udata/translations/sr/LC_MESSAGES/udata.mo +0 -0
  76. udata/translations/sr/LC_MESSAGES/udata.po +128 -64
  77. udata/translations/udata.pot +129 -65
  78. udata/uris.py +14 -13
  79. {udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/METADATA +26 -7
  80. {udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/RECORD +84 -72
  81. udata/static/chunks/18.ad41fb75ac4226e1f3ce.js.map +0 -1
  82. udata/static/chunks/7.11ac4de064ae59691d49.js.map +0 -1
  83. {udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/LICENSE +0 -0
  84. {udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/WHEEL +0 -0
  85. {udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/entry_points.txt +0 -0
  86. {udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/top_level.txt +0 -0
udata/core/dataset/rdf.py CHANGED
@@ -6,8 +6,6 @@ import json
6
6
  import logging
7
7
 
8
8
  from datetime import date
9
- from html.parser import HTMLParser
10
- from typing import Optional
11
9
  from dateutil.parser import parse as parse_dt
12
10
  from flask import current_app
13
11
  from geomet import wkt
@@ -18,12 +16,13 @@ from mongoengine.errors import ValidationError
18
16
 
19
17
  from udata import i18n, uris
20
18
  from udata.core.spatial.models import SpatialCoverage
21
- from udata.frontend.markdown import parse_html
22
19
  from udata.core.dataset.models import HarvestDatasetMetadata, HarvestResourceMetadata
23
- from udata.models import db, ContactPoint
20
+ from udata.harvest.exceptions import HarvestSkipException
21
+ from udata.models import db
24
22
  from udata.rdf import (
25
- DCAT, DCT, FREQ, SCV, SKOS, SPDX, SCHEMA, EUFREQ, EUFORMAT, IANAFORMAT, VCARD, RDFS,
26
- namespace_manager, schema_from_rdf, url_from_rdf
23
+ DCAT, DCATAP, DCT, FREQ, SCV, SKOS, SPDX, SCHEMA, EUFREQ, EUFORMAT, IANAFORMAT, TAG_TO_EU_HVD_CATEGORIES, RDFS,
24
+ namespace_manager, rdf_value, remote_url_from_rdf, sanitize_html, schema_from_rdf, themes_from_rdf, url_from_rdf, HVD_LEGISLATION,
25
+ contact_point_from_rdf,
27
26
  )
28
27
  from udata.utils import get_by, safe_unicode
29
28
  from udata.uris import endpoint_for
@@ -76,43 +75,6 @@ EU_RDF_REQUENCIES = {
76
75
  EUFREQ.NEVER: 'punctual',
77
76
  }
78
77
 
79
- # Map High Value Datasets URIs to keyword categories
80
- EU_HVD_CATEGORIES = {
81
- "http://data.europa.eu/bna/c_164e0bf5": "Météorologiques",
82
- "http://data.europa.eu/bna/c_a9135398": "Entreprises et propriété d'entreprises",
83
- "http://data.europa.eu/bna/c_ac64a52d": "Géospatiales",
84
- "http://data.europa.eu/bna/c_b79e35eb": "Mobilité",
85
- "http://data.europa.eu/bna/c_dd313021": "Observation de la terre et environnement",
86
- "http://data.europa.eu/bna/c_e1da4e07": "Statistiques"
87
- }
88
-
89
-
90
- class HTMLDetector(HTMLParser):
91
- def __init__(self, *args, **kwargs):
92
- HTMLParser.__init__(self, *args, **kwargs)
93
- self.elements = set()
94
-
95
- def handle_starttag(self, tag, attrs):
96
- self.elements.add(tag)
97
-
98
- def handle_endtag(self, tag):
99
- self.elements.add(tag)
100
-
101
-
102
- def is_html(text):
103
- parser = HTMLDetector()
104
- parser.feed(text)
105
- return bool(parser.elements)
106
-
107
-
108
- def sanitize_html(text):
109
- text = text.toPython() if isinstance(text, Literal) else ''
110
- if is_html(text):
111
- return parse_html(text)
112
- else:
113
- return text.strip()
114
-
115
-
116
78
  def temporal_to_rdf(daterange, graph=None):
117
79
  if not daterange:
118
80
  return
@@ -141,7 +103,7 @@ def owner_to_rdf(dataset, graph=None):
141
103
  return
142
104
 
143
105
 
144
- def resource_to_rdf(resource, dataset=None, graph=None):
106
+ def resource_to_rdf(resource, dataset=None, graph=None, is_hvd=False):
145
107
  '''
146
108
  Map a Resource domain model to a DCAT/RDF graph
147
109
  '''
@@ -180,22 +142,31 @@ def resource_to_rdf(resource, dataset=None, graph=None):
180
142
  checksum.add(SPDX.algorithm, getattr(SPDX, algorithm))
181
143
  checksum.add(SPDX.checksumValue, Literal(resource.checksum.value))
182
144
  r.add(SPDX.checksum, checksum)
145
+ if is_hvd:
146
+ # DCAT-AP HVD applicable legislation is also expected at the distribution level
147
+ r.add(DCATAP.applicableLegislation, URIRef(HVD_LEGISLATION))
183
148
  return r
184
149
 
185
150
 
151
+ def dataset_to_graph_id(dataset: Dataset) -> URIRef | BNode:
152
+ if dataset.harvest and dataset.harvest.uri:
153
+ return URIRef(dataset.harvest.uri)
154
+ elif dataset.id:
155
+ return URIRef(endpoint_for('datasets.show_redirect', 'api.dataset',
156
+ dataset=dataset.id, _external=True))
157
+ else:
158
+ # Should not happen in production. Some test only
159
+ # `build()` a dataset without saving it to the DB.
160
+ return BNode()
161
+
186
162
  def dataset_to_rdf(dataset, graph=None):
187
163
  '''
188
164
  Map a dataset domain model to a DCAT/RDF graph
189
165
  '''
190
166
  # Use the unlocalized permalink to the dataset as URI when available
191
167
  # unless there is already an upstream URI
192
- if dataset.harvest and dataset.harvest.uri:
193
- id = URIRef(dataset.harvest.uri)
194
- elif dataset.id:
195
- id = URIRef(endpoint_for('datasets.show_redirect', 'api.dataset',
196
- dataset=dataset.id, _external=True))
197
- else:
198
- id = BNode()
168
+ id = dataset_to_graph_id(dataset)
169
+
199
170
  # Expose upstream identifier if present
200
171
  if dataset.harvest and dataset.harvest.dct_identifier:
201
172
  identifier = dataset.harvest.dct_identifier
@@ -214,11 +185,20 @@ def dataset_to_rdf(dataset, graph=None):
214
185
  if dataset.acronym:
215
186
  d.set(SKOS.altLabel, Literal(dataset.acronym))
216
187
 
188
+ # Add DCAT-AP HVD properties if the dataset is tagged hvd.
189
+ # See https://semiceu.github.io/DCAT-AP/releases/2.2.0-hvd/
190
+ is_hvd = current_app.config['HVD_SUPPORT'] and 'hvd' in dataset.tags
191
+ if is_hvd:
192
+ d.add(DCATAP.applicableLegislation, URIRef(HVD_LEGISLATION))
193
+
217
194
  for tag in dataset.tags:
218
195
  d.add(DCAT.keyword, Literal(tag))
196
+ # Add HVD category if this dataset is tagged HVD
197
+ if is_hvd and tag in TAG_TO_EU_HVD_CATEGORIES:
198
+ d.add(DCATAP.hvdCategory, URIRef(TAG_TO_EU_HVD_CATEGORIES[tag]))
219
199
 
220
200
  for resource in dataset.resources:
221
- d.add(DCAT.distribution, resource_to_rdf(resource, dataset, graph))
201
+ d.add(DCAT.distribution, resource_to_rdf(resource, dataset, graph, is_hvd))
222
202
 
223
203
  if dataset.temporal_coverage:
224
204
  d.set(DCT.temporal, temporal_to_rdf(dataset.temporal_coverage, graph))
@@ -241,18 +221,6 @@ CHECKSUM_ALGORITHMS = {
241
221
  }
242
222
 
243
223
 
244
- def serialize_value(value):
245
- if isinstance(value, (URIRef, Literal)):
246
- return value.toPython()
247
- elif isinstance(value, RdfResource):
248
- return value.identifier.toPython()
249
-
250
-
251
- def rdf_value(obj, predicate, default=None):
252
- value = obj.value(predicate)
253
- return serialize_value(value) if value else default
254
-
255
-
256
224
  def temporal_from_literal(text):
257
225
  '''
258
226
  Parse a temporal coverage from a literal ie. either:
@@ -327,29 +295,6 @@ def temporal_from_rdf(period_of_time):
327
295
  # so we log the error for future investigation and improvement
328
296
  log.warning('Unable to parse temporal coverage', exc_info=True)
329
297
 
330
-
331
- def contact_point_from_rdf(rdf, dataset):
332
- contact_point = rdf.value(DCAT.contactPoint)
333
- if contact_point:
334
- name = rdf_value(contact_point, VCARD.fn) or ''
335
- email = (rdf_value(contact_point, VCARD.hasEmail)
336
- or rdf_value(contact_point, VCARD.email)
337
- or rdf_value(contact_point, DCAT.email))
338
- if not email:
339
- return
340
- email = email.replace('mailto:', '').strip()
341
- if dataset.organization:
342
- contact_point = ContactPoint.objects(
343
- name=name, email=email, organization=dataset.organization).first()
344
- return (contact_point or
345
- ContactPoint(name=name, email=email, organization=dataset.organization).save())
346
- elif dataset.owner:
347
- contact_point = ContactPoint.objects(
348
- name=name, email=email, owner=dataset.owner).first()
349
- return (contact_point or
350
- ContactPoint(name=name, email=email, owner=dataset.owner).save())
351
-
352
-
353
298
  def spatial_from_rdf(graph):
354
299
  geojsons = []
355
300
  for term in graph.objects(DCT.spatial):
@@ -489,43 +434,6 @@ def title_from_rdf(rdf, url):
489
434
  else:
490
435
  return i18n._('Nameless resource')
491
436
 
492
-
493
- def remote_url_from_rdf(rdf):
494
- '''
495
- Return DCAT.landingPage if found and uri validation succeeds.
496
- Use RDF identifier as fallback if uri validation succeeds.
497
- '''
498
- landing_page = url_from_rdf(rdf, DCAT.landingPage)
499
- uri = rdf.identifier.toPython()
500
- for candidate in [landing_page, uri]:
501
- if candidate:
502
- try:
503
- uris.validate(candidate)
504
- return candidate
505
- except uris.ValidationError:
506
- pass
507
-
508
-
509
- def theme_labels_from_rdf(rdf):
510
- '''
511
- Get theme labels to use as keywords.
512
- Map HVD keywords from known URIs resources if HVD support is activated.
513
- '''
514
- for theme in rdf.objects(DCAT.theme):
515
- if isinstance(theme, RdfResource):
516
- uri = theme.identifier.toPython()
517
- if current_app.config['HVD_SUPPORT'] and uri in EU_HVD_CATEGORIES:
518
- label = EU_HVD_CATEGORIES[uri]
519
- # Additionnally yield hvd keyword
520
- yield 'hvd'
521
- else:
522
- label = rdf_value(theme, SKOS.prefLabel)
523
- else:
524
- label = theme.toPython()
525
- if label:
526
- yield label
527
-
528
-
529
437
  def resource_from_rdf(graph_or_distrib, dataset=None, is_additionnal=False):
530
438
  '''
531
439
  Map a Resource domain model to a DCAT/RDF graph
@@ -603,6 +511,9 @@ def dataset_from_rdf(graph: Graph, dataset=None, node=None):
603
511
  d = graph.resource(node)
604
512
 
605
513
  dataset.title = rdf_value(d, DCT.title)
514
+ if not dataset.title:
515
+ raise HarvestSkipException("missing title on dataset")
516
+
606
517
  # Support dct:abstract if dct:description is missing (sometimes used instead)
607
518
  description = d.value(DCT.description) or d.value(DCT.abstract)
608
519
  dataset.description = sanitize_html(description)
@@ -620,9 +531,7 @@ def dataset_from_rdf(graph: Graph, dataset=None, node=None):
620
531
  if acronym:
621
532
  dataset.acronym = acronym
622
533
 
623
- tags = [tag.toPython() for tag in d.objects(DCAT.keyword)]
624
- tags += theme_labels_from_rdf(d)
625
- dataset.tags = list(set(tags))
534
+ dataset.tags = themes_from_rdf(d)
626
535
 
627
536
  temporal_coverage = temporal_from_rdf(d.value(DCT.temporal))
628
537
  if temporal_coverage:
@@ -675,7 +584,7 @@ def dataset_from_rdf(graph: Graph, dataset=None, node=None):
675
584
 
676
585
  return dataset
677
586
 
678
- def bbox_to_geojson_multipolygon(bbox_as_str: str) -> Optional[dict] :
587
+ def bbox_to_geojson_multipolygon(bbox_as_str: str) -> dict | None:
679
588
  bbox = bbox_as_str.strip().split(',')
680
589
  if len(bbox) != 4:
681
590
  return None
@@ -1,6 +1,8 @@
1
1
  import logging
2
2
  from datetime import datetime
3
3
 
4
+ from flask_login import current_user
5
+
4
6
  from udata.mongo import db
5
7
  from udata.core.spam.models import SpamMixin, spam_protected
6
8
  from .signals import (on_new_discussion, on_discussion_closed, on_new_discussion_comment)
@@ -67,6 +69,24 @@ class Discussion(SpamMixin, db.Document):
67
69
  def embeds_to_check_for_spam(self):
68
70
  return self.discussion[1:]
69
71
 
72
+ def spam_is_whitelisted(self) -> bool:
73
+ from udata.core.dataset.permissions import OwnablePermission
74
+ from udata.core.owned import Owned
75
+
76
+ if not current_user or not current_user.is_authenticated:
77
+ return False
78
+
79
+ if not isinstance(self.subject, Owned):
80
+ return False
81
+
82
+ # When creating a new Discussion the `subject` is an empty model
83
+ # with only `id`. We need to fetch it from the database to have
84
+ # all the required information
85
+ if not self.subject.owner or not self.subject.organization:
86
+ self.subject.reload()
87
+
88
+ return OwnablePermission(self.subject).can()
89
+
70
90
  @property
71
91
  def external_url(self):
72
92
  return self.subject.url_for(
@@ -15,18 +15,20 @@ class OrganizationCsvAdapter(csv.Adapter):
15
15
  ('url', 'external_url'),
16
16
  'description',
17
17
  ('logo', lambda o: o.logo(external=True)),
18
- ('badges', lambda o: [badge.kind for badge in o.badges]),
18
+ ('badges', lambda o: ','.join([badge.kind for badge in o.badges])),
19
19
  'created_at',
20
20
  'last_modified',
21
+ 'business_number_id',
22
+ ('members_count', lambda o: len(o.members)),
21
23
  )
22
24
 
23
25
  def dynamic_fields(self):
24
26
  return csv.metric_fields(Organization) + self.get_dynamic_field_downloads()
25
-
27
+
26
28
  def get_dynamic_field_downloads(self):
27
29
  downloads_counts = self.get_downloads_counts()
28
30
  return [('downloads', lambda o: downloads_counts.get(str(o.id), 0))]
29
-
31
+
30
32
  def get_downloads_counts(self):
31
33
  '''
32
34
  Prefetch all the resources' downloads for all selected organization into memory
File without changes
@@ -0,0 +1,44 @@
1
+ import mongoengine
2
+ from flask import request
3
+ from flask_login import current_user
4
+
5
+ from udata.api import API, api, fields
6
+ from udata.api_fields import patch
7
+
8
+ from .constants import reports_reasons_translations
9
+ from .models import Report
10
+
11
+ ns = api.namespace('reports', 'User reported objects related operations (beta)')
12
+
13
+ @ns.route('/', endpoint='reports')
14
+ class ReportsAPI(API):
15
+ @api.doc('list_reports')
16
+ @api.expect(Report.__index_parser__)
17
+ @api.marshal_with(Report.__page_fields__)
18
+ def get(self):
19
+ query = Report.objects
20
+
21
+ return Report.apply_sort_filters_and_pagination(query)
22
+
23
+ @api.secure
24
+ @api.doc('create_report', responses={400: 'Validation error'})
25
+ @api.expect(Report.__write_fields__)
26
+ @api.marshal_with(Report.__read_fields__, code=201)
27
+ def post(self):
28
+ report = patch(Report(), request)
29
+ report.by = current_user._get_current_object()
30
+
31
+ try:
32
+ report.save()
33
+ except mongoengine.errors.ValidationError as e:
34
+ api.abort(400, e.message)
35
+
36
+ return report, 201
37
+
38
+
39
+ @ns.route('/reasons/', endpoint='reports_reasons')
40
+ class ReportsReasonsAPI(API):
41
+ @api.doc('list_reports_reasons')
42
+ @ns.response(200, "list of available reasons associated with their labels", fields.Raw)
43
+ def get(self):
44
+ return reports_reasons_translations()
@@ -0,0 +1,30 @@
1
+ from udata.core.dataset.models import Dataset
2
+ from udata.i18n import lazy_gettext as _
3
+
4
+ REASON_PERSONAL_DATA = "personal_data"
5
+ REASON_EXPLICIT_CONTENT = "explicit_content"
6
+ REASON_ILLEGAL_CONTENT = "illegal_content"
7
+ REASON_OTHERS = "others"
8
+ REASON_SECURITY = "security"
9
+ REASON_SPAM = "spam"
10
+
11
+
12
+ def reports_reasons_translations() -> list:
13
+ """
14
+ This is a function to avoid creating the list with a wrong lang
15
+ at the start of the app.
16
+ """
17
+ return [
18
+ {"value": REASON_EXPLICIT_CONTENT, "label": _("Explicit content")},
19
+ {"value": REASON_ILLEGAL_CONTENT, "label": _("Illegal content")},
20
+ {"value": REASON_OTHERS, "label": _("Others")},
21
+ {"value": REASON_PERSONAL_DATA, "label": _("Personal data")},
22
+ {"value": REASON_SECURITY, "label": _("Security")},
23
+ {"value": REASON_SPAM, "label": _("Spam")},
24
+ ]
25
+
26
+
27
+ REPORT_REASONS_CHOICES: list[str] = [
28
+ item["value"] for item in reports_reasons_translations()
29
+ ]
30
+ REPORTABLE_MODELS = [Dataset]
@@ -0,0 +1,58 @@
1
+ from datetime import datetime
2
+
3
+ from mongoengine import NULLIFY, signals
4
+
5
+ from udata.api_fields import field, generate_fields
6
+ from udata.core.user.api_fields import user_ref_fields
7
+ from udata.core.user.models import User
8
+ from udata.mongo import db
9
+
10
+ from .constants import REPORT_REASONS_CHOICES, REPORTABLE_MODELS
11
+
12
+
13
+ @generate_fields()
14
+ class Report(db.Document):
15
+ by = field(
16
+ db.ReferenceField(User, reverse_delete_rule=NULLIFY),
17
+ nested_fields=user_ref_fields,
18
+ description="Only set if a user was connected when reporting an element.",
19
+ readonly=True,
20
+ allow_null=True,
21
+ )
22
+
23
+ object_type = field(
24
+ db.StringField(choices=[m.__name__ for m in REPORTABLE_MODELS])
25
+ )
26
+ object_id = field(
27
+ db.ObjectIdField()
28
+ )
29
+ object_deleted_at = field(
30
+ db.DateTimeField(),
31
+ allow_null=True,
32
+ readonly=True,
33
+ )
34
+
35
+ reason = field(
36
+ db.StringField(choices=REPORT_REASONS_CHOICES, required=True),
37
+ )
38
+ message = field(
39
+ db.StringField(),
40
+ )
41
+
42
+ reported_at = field(
43
+ db.DateTimeField(default=datetime.utcnow, required=True),
44
+ readonly=True,
45
+ )
46
+
47
+ @classmethod
48
+ def mark_as_deleted_soft_delete(cls, sender, document, **kwargs):
49
+ if document.deleted:
50
+ Report.objects(object_type=sender.__name__, object_id=document.id, object_deleted_at=None).update(object_deleted_at=datetime.utcnow)
51
+
52
+ def mark_as_deleted_hard_delete(cls, document, **kwargs):
53
+ Report.objects(object_type=document.__class__.__name__, object_id=document.id, object_deleted_at=None).update(object_deleted_at=datetime.utcnow)
54
+
55
+
56
+ for model in REPORTABLE_MODELS:
57
+ signals.post_save.connect(Report.mark_as_deleted_soft_delete, sender=model)
58
+ signals.post_delete.connect(Report.mark_as_deleted_hard_delete, sender=model)
udata/core/reuse/csv.py CHANGED
@@ -15,10 +15,13 @@ class ReuseCsvAdapter(csv.Adapter):
15
15
  ('remote_url', 'url'),
16
16
  ('organization', 'organization.name'),
17
17
  ('organization_id', 'organization.id'),
18
+ ('owner', 'owner.slug'), # in case it's owned by a user
19
+ ('owner_id', 'owner.id'),
18
20
  ('image', lambda r: r.image(external=True)),
19
21
  ('featured', lambda r: r.featured or False),
20
22
  'created_at',
21
23
  'last_modified',
24
+ 'topic',
22
25
  ('tags', lambda r: ','.join(r.tags)),
23
26
  ('datasets', lambda r: ','.join([str(d.id) for d in r.datasets])),
24
27
  )
udata/core/site/api.py CHANGED
@@ -1,9 +1,11 @@
1
1
  from bson import ObjectId
2
2
 
3
3
  from flask import request, redirect, url_for, json, make_response
4
+ from mongoengine import Q
4
5
 
5
6
  from udata.api import api, API, fields
6
7
  from udata.auth import admin_permission
8
+ from udata.core.dataservices.models import Dataservice
7
9
  from udata.models import Dataset, Reuse
8
10
  from udata.utils import multi_to_dict
9
11
  from udata.rdf import (
@@ -105,8 +107,37 @@ class SiteRdfCatalogFormat(API):
105
107
  params = multi_to_dict(request.args)
106
108
  page = int(params.get('page', 1))
107
109
  page_size = int(params.get('page_size', 100))
108
- datasets = Dataset.objects.visible().paginate(page, page_size)
109
- catalog = build_catalog(current_site, datasets, format=format)
110
+ datasets = Dataset.objects.visible()
111
+ if 'tag' in params:
112
+ datasets = datasets.filter(tags=params.get('tag', ''))
113
+ datasets = datasets.paginate(page, page_size)
114
+
115
+ # We need to add Dataservice to the catalog.
116
+ # In the best world, we want:
117
+ # - Keep the correct number of datasets on the page (if the requested page size is 100, we should have 100 datasets)
118
+ # - Have simple MongoDB queries
119
+ # - Do not duplicate the datasets (each dataset is present once in the catalog)
120
+ # - Do not duplicate the dataservices (each dataservice is present once in the catalog)
121
+ # - Every referenced dataset for one dataservices present on the page (hard to do)
122
+ #
123
+ # Multiple solutions are possible but none check all the constraints.
124
+ # The selected one is to put all the dataservices referencing at least one of the dataset on
125
+ # the page at the end of it. It means dataservices could be duplicated (present on multiple pages)
126
+ # and these dataservices may referenced some datasets not present in the current page. It's working
127
+ # if somebody is doing the same thing as us (keeping the list of all the datasets IDs for the entire catalog then
128
+ # listing all dataservices in a second pass)
129
+ # Another option is to do some tricky Mongo requests to order/group datasets by their presence in some dataservices but
130
+ # it could be really hard to do with a n..n relation.
131
+ # Let's keep this solution simple right now and iterate on it in the future.
132
+ dataservices_filter = Q(datasets__in=[d.id for d in datasets])
133
+
134
+ # On the first page, add all dataservices without datasets
135
+ if page == 1:
136
+ dataservices_filter = dataservices_filter | Q(datasets__size=0)
137
+
138
+ dataservices = Dataservice.objects.visible().filter(dataservices_filter)
139
+
140
+ catalog = build_catalog(current_site, datasets, dataservices=dataservices, format=format)
110
141
  # bypass flask-restplus make_response, since graph_response
111
142
  # is handling the content negociation directly
112
143
  return make_response(*graph_response(catalog, format))
udata/core/site/rdf.py CHANGED
@@ -5,6 +5,7 @@ from flask import url_for, current_app
5
5
  from rdflib import Graph, URIRef, Literal, BNode
6
6
  from rdflib.namespace import RDF, FOAF
7
7
 
8
+ from udata.core.dataservices.rdf import dataservice_to_rdf
8
9
  from udata.core.dataset.rdf import dataset_to_rdf
9
10
  from udata.core.organization.rdf import organization_to_rdf
10
11
  from udata.core.user.rdf import user_to_rdf
@@ -13,7 +14,7 @@ from udata.utils import Paginable
13
14
  from udata.uris import endpoint_for
14
15
 
15
16
 
16
- def build_catalog(site, datasets, format=None):
17
+ def build_catalog(site, datasets, dataservices = [], format=None):
17
18
  '''Build the DCAT catalog for this site'''
18
19
  site_url = endpoint_for('site.home_redirect', 'api.site', _external=True)
19
20
  catalog_url = url_for('api.site_rdf_catalog', _external=True)
@@ -40,6 +41,10 @@ def build_catalog(site, datasets, format=None):
40
41
  rdf_dataset.add(DCT.publisher, organization_to_rdf(dataset.organization, graph))
41
42
  catalog.add(DCAT.dataset, rdf_dataset)
42
43
 
44
+ for dataservice in dataservices:
45
+ rdf_dataservice = dataservice_to_rdf(dataservice, graph)
46
+ catalog.add(DCAT.DataService, rdf_dataservice)
47
+
43
48
  if isinstance(datasets, Paginable):
44
49
  paginate_catalog(catalog, graph, datasets, format, 'api.site_rdf_catalog_format')
45
50
 
udata/core/spam/models.py CHANGED
@@ -67,6 +67,9 @@ class SpamMixin(object):
67
67
  if not self.spam:
68
68
  self.spam = SpamInfo(status=NOT_CHECKED, callbacks={})
69
69
 
70
+ if self.spam_is_whitelisted():
71
+ return
72
+
70
73
  # The breadcrumb is useful during reporting to know where we came from
71
74
  # in case of a potential spam inside an embed.
72
75
  if breadcrumb is None:
@@ -139,6 +142,9 @@ class SpamMixin(object):
139
142
  def embeds_to_check_for_spam(self):
140
143
  return []
141
144
 
145
+ def spam_is_whitelisted(self) -> bool :
146
+ return False
147
+
142
148
  def spam_report_message(self):
143
149
  return f"Spam potentiel sur {type(self).__name__}"
144
150
 
@@ -3,7 +3,7 @@ from mongoengine.signals import pre_save
3
3
  from udata.models import db, SpatialCoverage
4
4
  from udata.search import reindex
5
5
  from udata.tasks import as_task_param
6
- from udata.core.owned import Owned
6
+ from udata.core.owned import Owned, OwnedQuerySet
7
7
 
8
8
 
9
9
  __all__ = ('Topic', )
@@ -36,7 +36,8 @@ class Topic(db.Document, Owned, db.Datetimed):
36
36
  'slug'
37
37
  ] + Owned.meta['indexes'],
38
38
  'ordering': ['-created_at'],
39
- 'auto_create_index_on_save': True
39
+ 'auto_create_index_on_save': True,
40
+ 'queryset_class': OwnedQuerySet,
40
41
  }
41
42
 
42
43
  def __str__(self):
@@ -11,10 +11,11 @@ class TopicApiParser(ModelApiParser):
11
11
  'last_modified': 'last_modified',
12
12
  }
13
13
 
14
- def __init__(self):
14
+ def __init__(self, with_include_private=True):
15
15
  super().__init__()
16
+ if with_include_private:
17
+ self.parser.add_argument('include_private', type=bool, location='args')
16
18
  self.parser.add_argument('tag', type=str, location='args')
17
- self.parser.add_argument('include_private', type=bool, location='args')
18
19
  self.parser.add_argument('geozone', type=str, location='args')
19
20
  self.parser.add_argument('granularity', type=str, location='args')
20
21
  self.parser.add_argument('organization', type=str, location='args')
@@ -0,0 +1,28 @@
1
+ from flask_security import current_user
2
+
3
+ from udata.api import apiv2, API
4
+ from udata.core.topic.apiv2 import topic_page_fields
5
+ from udata.core.topic.parsers import TopicApiParser
6
+ from udata.models import Topic
7
+
8
+ me = apiv2.namespace('me', 'Connected user related operations (v2)')
9
+
10
+ # we will force include_private to True, no need for this arg
11
+ topic_parser = TopicApiParser(with_include_private=False)
12
+
13
+
14
+ @me.route('/org_topics/', endpoint='my_org_topics')
15
+ class MyOrgTopicsAPI(API):
16
+ @apiv2.secure
17
+ @apiv2.doc('my_org_topics')
18
+ @apiv2.expect(topic_parser.parser)
19
+ @apiv2.marshal_list_with(topic_page_fields)
20
+ def get(self):
21
+ '''List all topics related to me and my organizations.'''
22
+ args = topic_parser.parse()
23
+ args["include_private"] = True
24
+ owners = list(current_user.organizations) + [current_user.id]
25
+ topics = Topic.objects.owned_by(*owners)
26
+ topics = topic_parser.parse_filters(topics, args)
27
+ sort = args['sort'] or ('$text_score' if args['q'] else None) or '-last-modified'
28
+ return topics.order_by(sort).paginate(args['page'], args['page_size'])
udata/db/__init__.py ADDED
File without changes
udata/db/tasks.py ADDED
@@ -0,0 +1,6 @@
1
+ from udata.commands.db import check_references
2
+ from udata.tasks import job
3
+
4
+ @job('check-integrity')
5
+ def check_integrity(self):
6
+ check_references()
@@ -1,4 +1,3 @@
1
- from importlib import import_module
2
1
 
3
2
  import logging
4
3
 
udata/forms/fields.py CHANGED
@@ -206,8 +206,8 @@ class URLField(EmptyNone, Field):
206
206
  if self.data:
207
207
  try:
208
208
  uris.validate(self.data)
209
- except uris.ValidationError:
210
- raise validators.ValidationError(_('Invalid URL'))
209
+ except uris.ValidationError as e:
210
+ raise validators.ValidationError(str(e))
211
211
  return True
212
212
 
213
213
  def process_formdata(self, valuelist):