udata 8.0.2.dev29304__py2.py3-none-any.whl → 9.1.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of udata might be problematic. Click here for more details.
- udata/__init__.py +1 -1
- udata/api/__init__.py +2 -0
- udata/api/commands.py +0 -2
- udata/api_fields.py +41 -3
- udata/commands/db.py +88 -48
- udata/core/dataservices/factories.py +33 -0
- udata/core/dataservices/models.py +42 -4
- udata/core/dataservices/rdf.py +106 -0
- udata/core/dataset/csv.py +8 -1
- udata/core/dataset/models.py +1 -2
- udata/core/dataset/rdf.py +37 -128
- udata/core/discussions/models.py +20 -0
- udata/core/organization/csv.py +5 -3
- udata/core/reports/__init__.py +0 -0
- udata/core/reports/api.py +44 -0
- udata/core/reports/constants.py +30 -0
- udata/core/reports/models.py +58 -0
- udata/core/reuse/csv.py +3 -0
- udata/core/site/api.py +33 -2
- udata/core/site/rdf.py +6 -1
- udata/core/spam/models.py +6 -0
- udata/core/topic/models.py +3 -2
- udata/core/topic/parsers.py +3 -2
- udata/core/user/apiv2.py +28 -0
- udata/db/__init__.py +0 -0
- udata/db/tasks.py +6 -0
- udata/features/notifications/__init__.py +0 -1
- udata/forms/fields.py +2 -2
- udata/harvest/api.py +19 -1
- udata/harvest/backends/base.py +118 -10
- udata/harvest/backends/dcat.py +28 -7
- udata/harvest/models.py +6 -0
- udata/harvest/tests/dcat/bnodes.xml +13 -2
- udata/harvest/tests/test_dcat_backend.py +21 -0
- udata/migrations/2024-06-11-fix-reuse-datasets-references.py +35 -0
- udata/models/__init__.py +1 -0
- udata/rdf.py +113 -2
- udata/routing.py +1 -1
- udata/settings.py +3 -1
- udata/static/admin.js +17 -17
- udata/static/admin.js.map +1 -1
- udata/static/chunks/{18.ad41fb75ac4226e1f3ce.js → 18.1922fd0b2b7fad122991.js} +3 -3
- udata/static/chunks/18.1922fd0b2b7fad122991.js.map +1 -0
- udata/static/chunks/{7.11ac4de064ae59691d49.js → 7.e2106342e94ee09393b1.js} +2 -2
- udata/static/chunks/7.e2106342e94ee09393b1.js.map +1 -0
- udata/static/common.js +1 -1
- udata/static/common.js.map +1 -1
- udata/storage/s3.py +3 -3
- udata/tasks.py +1 -0
- udata/tests/api/test_dataservices_api.py +26 -2
- udata/tests/api/test_datasets_api.py +1 -1
- udata/tests/api/test_reports_api.py +87 -0
- udata/tests/apiv2/test_me_api.py +40 -0
- udata/tests/dataset/test_dataset_rdf.py +19 -1
- udata/tests/frontend/test_auth.py +1 -4
- udata/tests/organization/test_csv_adapter.py +0 -1
- udata/tests/plugin.py +2 -0
- udata/tests/site/test_site_api.py +0 -1
- udata/tests/site/test_site_rdf.py +66 -0
- udata/tests/test_discussions.py +24 -34
- udata/tests/test_model.py +3 -2
- udata/tests/test_utils.py +1 -1
- udata/translations/ar/LC_MESSAGES/udata.mo +0 -0
- udata/translations/ar/LC_MESSAGES/udata.po +128 -64
- udata/translations/de/LC_MESSAGES/udata.mo +0 -0
- udata/translations/de/LC_MESSAGES/udata.po +128 -64
- udata/translations/es/LC_MESSAGES/udata.mo +0 -0
- udata/translations/es/LC_MESSAGES/udata.po +128 -64
- udata/translations/fr/LC_MESSAGES/udata.mo +0 -0
- udata/translations/fr/LC_MESSAGES/udata.po +128 -64
- udata/translations/it/LC_MESSAGES/udata.mo +0 -0
- udata/translations/it/LC_MESSAGES/udata.po +128 -64
- udata/translations/pt/LC_MESSAGES/udata.mo +0 -0
- udata/translations/pt/LC_MESSAGES/udata.po +128 -64
- udata/translations/sr/LC_MESSAGES/udata.mo +0 -0
- udata/translations/sr/LC_MESSAGES/udata.po +128 -64
- udata/translations/udata.pot +129 -65
- udata/uris.py +14 -13
- {udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/METADATA +26 -7
- {udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/RECORD +84 -72
- udata/static/chunks/18.ad41fb75ac4226e1f3ce.js.map +0 -1
- udata/static/chunks/7.11ac4de064ae59691d49.js.map +0 -1
- {udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/LICENSE +0 -0
- {udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/WHEEL +0 -0
- {udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/entry_points.txt +0 -0
- {udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/top_level.txt +0 -0
udata/core/dataset/rdf.py
CHANGED
|
@@ -6,8 +6,6 @@ import json
|
|
|
6
6
|
import logging
|
|
7
7
|
|
|
8
8
|
from datetime import date
|
|
9
|
-
from html.parser import HTMLParser
|
|
10
|
-
from typing import Optional
|
|
11
9
|
from dateutil.parser import parse as parse_dt
|
|
12
10
|
from flask import current_app
|
|
13
11
|
from geomet import wkt
|
|
@@ -18,12 +16,13 @@ from mongoengine.errors import ValidationError
|
|
|
18
16
|
|
|
19
17
|
from udata import i18n, uris
|
|
20
18
|
from udata.core.spatial.models import SpatialCoverage
|
|
21
|
-
from udata.frontend.markdown import parse_html
|
|
22
19
|
from udata.core.dataset.models import HarvestDatasetMetadata, HarvestResourceMetadata
|
|
23
|
-
from udata.
|
|
20
|
+
from udata.harvest.exceptions import HarvestSkipException
|
|
21
|
+
from udata.models import db
|
|
24
22
|
from udata.rdf import (
|
|
25
|
-
DCAT, DCT, FREQ, SCV, SKOS, SPDX, SCHEMA, EUFREQ, EUFORMAT, IANAFORMAT,
|
|
26
|
-
namespace_manager, schema_from_rdf, url_from_rdf
|
|
23
|
+
DCAT, DCATAP, DCT, FREQ, SCV, SKOS, SPDX, SCHEMA, EUFREQ, EUFORMAT, IANAFORMAT, TAG_TO_EU_HVD_CATEGORIES, RDFS,
|
|
24
|
+
namespace_manager, rdf_value, remote_url_from_rdf, sanitize_html, schema_from_rdf, themes_from_rdf, url_from_rdf, HVD_LEGISLATION,
|
|
25
|
+
contact_point_from_rdf,
|
|
27
26
|
)
|
|
28
27
|
from udata.utils import get_by, safe_unicode
|
|
29
28
|
from udata.uris import endpoint_for
|
|
@@ -76,43 +75,6 @@ EU_RDF_REQUENCIES = {
|
|
|
76
75
|
EUFREQ.NEVER: 'punctual',
|
|
77
76
|
}
|
|
78
77
|
|
|
79
|
-
# Map High Value Datasets URIs to keyword categories
|
|
80
|
-
EU_HVD_CATEGORIES = {
|
|
81
|
-
"http://data.europa.eu/bna/c_164e0bf5": "Météorologiques",
|
|
82
|
-
"http://data.europa.eu/bna/c_a9135398": "Entreprises et propriété d'entreprises",
|
|
83
|
-
"http://data.europa.eu/bna/c_ac64a52d": "Géospatiales",
|
|
84
|
-
"http://data.europa.eu/bna/c_b79e35eb": "Mobilité",
|
|
85
|
-
"http://data.europa.eu/bna/c_dd313021": "Observation de la terre et environnement",
|
|
86
|
-
"http://data.europa.eu/bna/c_e1da4e07": "Statistiques"
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
class HTMLDetector(HTMLParser):
|
|
91
|
-
def __init__(self, *args, **kwargs):
|
|
92
|
-
HTMLParser.__init__(self, *args, **kwargs)
|
|
93
|
-
self.elements = set()
|
|
94
|
-
|
|
95
|
-
def handle_starttag(self, tag, attrs):
|
|
96
|
-
self.elements.add(tag)
|
|
97
|
-
|
|
98
|
-
def handle_endtag(self, tag):
|
|
99
|
-
self.elements.add(tag)
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
def is_html(text):
|
|
103
|
-
parser = HTMLDetector()
|
|
104
|
-
parser.feed(text)
|
|
105
|
-
return bool(parser.elements)
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
def sanitize_html(text):
|
|
109
|
-
text = text.toPython() if isinstance(text, Literal) else ''
|
|
110
|
-
if is_html(text):
|
|
111
|
-
return parse_html(text)
|
|
112
|
-
else:
|
|
113
|
-
return text.strip()
|
|
114
|
-
|
|
115
|
-
|
|
116
78
|
def temporal_to_rdf(daterange, graph=None):
|
|
117
79
|
if not daterange:
|
|
118
80
|
return
|
|
@@ -141,7 +103,7 @@ def owner_to_rdf(dataset, graph=None):
|
|
|
141
103
|
return
|
|
142
104
|
|
|
143
105
|
|
|
144
|
-
def resource_to_rdf(resource, dataset=None, graph=None):
|
|
106
|
+
def resource_to_rdf(resource, dataset=None, graph=None, is_hvd=False):
|
|
145
107
|
'''
|
|
146
108
|
Map a Resource domain model to a DCAT/RDF graph
|
|
147
109
|
'''
|
|
@@ -180,22 +142,31 @@ def resource_to_rdf(resource, dataset=None, graph=None):
|
|
|
180
142
|
checksum.add(SPDX.algorithm, getattr(SPDX, algorithm))
|
|
181
143
|
checksum.add(SPDX.checksumValue, Literal(resource.checksum.value))
|
|
182
144
|
r.add(SPDX.checksum, checksum)
|
|
145
|
+
if is_hvd:
|
|
146
|
+
# DCAT-AP HVD applicable legislation is also expected at the distribution level
|
|
147
|
+
r.add(DCATAP.applicableLegislation, URIRef(HVD_LEGISLATION))
|
|
183
148
|
return r
|
|
184
149
|
|
|
185
150
|
|
|
151
|
+
def dataset_to_graph_id(dataset: Dataset) -> URIRef | BNode:
|
|
152
|
+
if dataset.harvest and dataset.harvest.uri:
|
|
153
|
+
return URIRef(dataset.harvest.uri)
|
|
154
|
+
elif dataset.id:
|
|
155
|
+
return URIRef(endpoint_for('datasets.show_redirect', 'api.dataset',
|
|
156
|
+
dataset=dataset.id, _external=True))
|
|
157
|
+
else:
|
|
158
|
+
# Should not happen in production. Some test only
|
|
159
|
+
# `build()` a dataset without saving it to the DB.
|
|
160
|
+
return BNode()
|
|
161
|
+
|
|
186
162
|
def dataset_to_rdf(dataset, graph=None):
|
|
187
163
|
'''
|
|
188
164
|
Map a dataset domain model to a DCAT/RDF graph
|
|
189
165
|
'''
|
|
190
166
|
# Use the unlocalized permalink to the dataset as URI when available
|
|
191
167
|
# unless there is already an upstream URI
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
elif dataset.id:
|
|
195
|
-
id = URIRef(endpoint_for('datasets.show_redirect', 'api.dataset',
|
|
196
|
-
dataset=dataset.id, _external=True))
|
|
197
|
-
else:
|
|
198
|
-
id = BNode()
|
|
168
|
+
id = dataset_to_graph_id(dataset)
|
|
169
|
+
|
|
199
170
|
# Expose upstream identifier if present
|
|
200
171
|
if dataset.harvest and dataset.harvest.dct_identifier:
|
|
201
172
|
identifier = dataset.harvest.dct_identifier
|
|
@@ -214,11 +185,20 @@ def dataset_to_rdf(dataset, graph=None):
|
|
|
214
185
|
if dataset.acronym:
|
|
215
186
|
d.set(SKOS.altLabel, Literal(dataset.acronym))
|
|
216
187
|
|
|
188
|
+
# Add DCAT-AP HVD properties if the dataset is tagged hvd.
|
|
189
|
+
# See https://semiceu.github.io/DCAT-AP/releases/2.2.0-hvd/
|
|
190
|
+
is_hvd = current_app.config['HVD_SUPPORT'] and 'hvd' in dataset.tags
|
|
191
|
+
if is_hvd:
|
|
192
|
+
d.add(DCATAP.applicableLegislation, URIRef(HVD_LEGISLATION))
|
|
193
|
+
|
|
217
194
|
for tag in dataset.tags:
|
|
218
195
|
d.add(DCAT.keyword, Literal(tag))
|
|
196
|
+
# Add HVD category if this dataset is tagged HVD
|
|
197
|
+
if is_hvd and tag in TAG_TO_EU_HVD_CATEGORIES:
|
|
198
|
+
d.add(DCATAP.hvdCategory, URIRef(TAG_TO_EU_HVD_CATEGORIES[tag]))
|
|
219
199
|
|
|
220
200
|
for resource in dataset.resources:
|
|
221
|
-
d.add(DCAT.distribution, resource_to_rdf(resource, dataset, graph))
|
|
201
|
+
d.add(DCAT.distribution, resource_to_rdf(resource, dataset, graph, is_hvd))
|
|
222
202
|
|
|
223
203
|
if dataset.temporal_coverage:
|
|
224
204
|
d.set(DCT.temporal, temporal_to_rdf(dataset.temporal_coverage, graph))
|
|
@@ -241,18 +221,6 @@ CHECKSUM_ALGORITHMS = {
|
|
|
241
221
|
}
|
|
242
222
|
|
|
243
223
|
|
|
244
|
-
def serialize_value(value):
|
|
245
|
-
if isinstance(value, (URIRef, Literal)):
|
|
246
|
-
return value.toPython()
|
|
247
|
-
elif isinstance(value, RdfResource):
|
|
248
|
-
return value.identifier.toPython()
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
def rdf_value(obj, predicate, default=None):
|
|
252
|
-
value = obj.value(predicate)
|
|
253
|
-
return serialize_value(value) if value else default
|
|
254
|
-
|
|
255
|
-
|
|
256
224
|
def temporal_from_literal(text):
|
|
257
225
|
'''
|
|
258
226
|
Parse a temporal coverage from a literal ie. either:
|
|
@@ -327,29 +295,6 @@ def temporal_from_rdf(period_of_time):
|
|
|
327
295
|
# so we log the error for future investigation and improvement
|
|
328
296
|
log.warning('Unable to parse temporal coverage', exc_info=True)
|
|
329
297
|
|
|
330
|
-
|
|
331
|
-
def contact_point_from_rdf(rdf, dataset):
|
|
332
|
-
contact_point = rdf.value(DCAT.contactPoint)
|
|
333
|
-
if contact_point:
|
|
334
|
-
name = rdf_value(contact_point, VCARD.fn) or ''
|
|
335
|
-
email = (rdf_value(contact_point, VCARD.hasEmail)
|
|
336
|
-
or rdf_value(contact_point, VCARD.email)
|
|
337
|
-
or rdf_value(contact_point, DCAT.email))
|
|
338
|
-
if not email:
|
|
339
|
-
return
|
|
340
|
-
email = email.replace('mailto:', '').strip()
|
|
341
|
-
if dataset.organization:
|
|
342
|
-
contact_point = ContactPoint.objects(
|
|
343
|
-
name=name, email=email, organization=dataset.organization).first()
|
|
344
|
-
return (contact_point or
|
|
345
|
-
ContactPoint(name=name, email=email, organization=dataset.organization).save())
|
|
346
|
-
elif dataset.owner:
|
|
347
|
-
contact_point = ContactPoint.objects(
|
|
348
|
-
name=name, email=email, owner=dataset.owner).first()
|
|
349
|
-
return (contact_point or
|
|
350
|
-
ContactPoint(name=name, email=email, owner=dataset.owner).save())
|
|
351
|
-
|
|
352
|
-
|
|
353
298
|
def spatial_from_rdf(graph):
|
|
354
299
|
geojsons = []
|
|
355
300
|
for term in graph.objects(DCT.spatial):
|
|
@@ -489,43 +434,6 @@ def title_from_rdf(rdf, url):
|
|
|
489
434
|
else:
|
|
490
435
|
return i18n._('Nameless resource')
|
|
491
436
|
|
|
492
|
-
|
|
493
|
-
def remote_url_from_rdf(rdf):
|
|
494
|
-
'''
|
|
495
|
-
Return DCAT.landingPage if found and uri validation succeeds.
|
|
496
|
-
Use RDF identifier as fallback if uri validation succeeds.
|
|
497
|
-
'''
|
|
498
|
-
landing_page = url_from_rdf(rdf, DCAT.landingPage)
|
|
499
|
-
uri = rdf.identifier.toPython()
|
|
500
|
-
for candidate in [landing_page, uri]:
|
|
501
|
-
if candidate:
|
|
502
|
-
try:
|
|
503
|
-
uris.validate(candidate)
|
|
504
|
-
return candidate
|
|
505
|
-
except uris.ValidationError:
|
|
506
|
-
pass
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
def theme_labels_from_rdf(rdf):
|
|
510
|
-
'''
|
|
511
|
-
Get theme labels to use as keywords.
|
|
512
|
-
Map HVD keywords from known URIs resources if HVD support is activated.
|
|
513
|
-
'''
|
|
514
|
-
for theme in rdf.objects(DCAT.theme):
|
|
515
|
-
if isinstance(theme, RdfResource):
|
|
516
|
-
uri = theme.identifier.toPython()
|
|
517
|
-
if current_app.config['HVD_SUPPORT'] and uri in EU_HVD_CATEGORIES:
|
|
518
|
-
label = EU_HVD_CATEGORIES[uri]
|
|
519
|
-
# Additionnally yield hvd keyword
|
|
520
|
-
yield 'hvd'
|
|
521
|
-
else:
|
|
522
|
-
label = rdf_value(theme, SKOS.prefLabel)
|
|
523
|
-
else:
|
|
524
|
-
label = theme.toPython()
|
|
525
|
-
if label:
|
|
526
|
-
yield label
|
|
527
|
-
|
|
528
|
-
|
|
529
437
|
def resource_from_rdf(graph_or_distrib, dataset=None, is_additionnal=False):
|
|
530
438
|
'''
|
|
531
439
|
Map a Resource domain model to a DCAT/RDF graph
|
|
@@ -603,6 +511,9 @@ def dataset_from_rdf(graph: Graph, dataset=None, node=None):
|
|
|
603
511
|
d = graph.resource(node)
|
|
604
512
|
|
|
605
513
|
dataset.title = rdf_value(d, DCT.title)
|
|
514
|
+
if not dataset.title:
|
|
515
|
+
raise HarvestSkipException("missing title on dataset")
|
|
516
|
+
|
|
606
517
|
# Support dct:abstract if dct:description is missing (sometimes used instead)
|
|
607
518
|
description = d.value(DCT.description) or d.value(DCT.abstract)
|
|
608
519
|
dataset.description = sanitize_html(description)
|
|
@@ -620,9 +531,7 @@ def dataset_from_rdf(graph: Graph, dataset=None, node=None):
|
|
|
620
531
|
if acronym:
|
|
621
532
|
dataset.acronym = acronym
|
|
622
533
|
|
|
623
|
-
tags =
|
|
624
|
-
tags += theme_labels_from_rdf(d)
|
|
625
|
-
dataset.tags = list(set(tags))
|
|
534
|
+
dataset.tags = themes_from_rdf(d)
|
|
626
535
|
|
|
627
536
|
temporal_coverage = temporal_from_rdf(d.value(DCT.temporal))
|
|
628
537
|
if temporal_coverage:
|
|
@@ -675,7 +584,7 @@ def dataset_from_rdf(graph: Graph, dataset=None, node=None):
|
|
|
675
584
|
|
|
676
585
|
return dataset
|
|
677
586
|
|
|
678
|
-
def bbox_to_geojson_multipolygon(bbox_as_str: str) ->
|
|
587
|
+
def bbox_to_geojson_multipolygon(bbox_as_str: str) -> dict | None:
|
|
679
588
|
bbox = bbox_as_str.strip().split(',')
|
|
680
589
|
if len(bbox) != 4:
|
|
681
590
|
return None
|
udata/core/discussions/models.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
|
|
4
|
+
from flask_login import current_user
|
|
5
|
+
|
|
4
6
|
from udata.mongo import db
|
|
5
7
|
from udata.core.spam.models import SpamMixin, spam_protected
|
|
6
8
|
from .signals import (on_new_discussion, on_discussion_closed, on_new_discussion_comment)
|
|
@@ -67,6 +69,24 @@ class Discussion(SpamMixin, db.Document):
|
|
|
67
69
|
def embeds_to_check_for_spam(self):
|
|
68
70
|
return self.discussion[1:]
|
|
69
71
|
|
|
72
|
+
def spam_is_whitelisted(self) -> bool:
|
|
73
|
+
from udata.core.dataset.permissions import OwnablePermission
|
|
74
|
+
from udata.core.owned import Owned
|
|
75
|
+
|
|
76
|
+
if not current_user or not current_user.is_authenticated:
|
|
77
|
+
return False
|
|
78
|
+
|
|
79
|
+
if not isinstance(self.subject, Owned):
|
|
80
|
+
return False
|
|
81
|
+
|
|
82
|
+
# When creating a new Discussion the `subject` is an empty model
|
|
83
|
+
# with only `id`. We need to fetch it from the database to have
|
|
84
|
+
# all the required information
|
|
85
|
+
if not self.subject.owner or not self.subject.organization:
|
|
86
|
+
self.subject.reload()
|
|
87
|
+
|
|
88
|
+
return OwnablePermission(self.subject).can()
|
|
89
|
+
|
|
70
90
|
@property
|
|
71
91
|
def external_url(self):
|
|
72
92
|
return self.subject.url_for(
|
udata/core/organization/csv.py
CHANGED
|
@@ -15,18 +15,20 @@ class OrganizationCsvAdapter(csv.Adapter):
|
|
|
15
15
|
('url', 'external_url'),
|
|
16
16
|
'description',
|
|
17
17
|
('logo', lambda o: o.logo(external=True)),
|
|
18
|
-
('badges', lambda o: [badge.kind for badge in o.badges]),
|
|
18
|
+
('badges', lambda o: ','.join([badge.kind for badge in o.badges])),
|
|
19
19
|
'created_at',
|
|
20
20
|
'last_modified',
|
|
21
|
+
'business_number_id',
|
|
22
|
+
('members_count', lambda o: len(o.members)),
|
|
21
23
|
)
|
|
22
24
|
|
|
23
25
|
def dynamic_fields(self):
|
|
24
26
|
return csv.metric_fields(Organization) + self.get_dynamic_field_downloads()
|
|
25
|
-
|
|
27
|
+
|
|
26
28
|
def get_dynamic_field_downloads(self):
|
|
27
29
|
downloads_counts = self.get_downloads_counts()
|
|
28
30
|
return [('downloads', lambda o: downloads_counts.get(str(o.id), 0))]
|
|
29
|
-
|
|
31
|
+
|
|
30
32
|
def get_downloads_counts(self):
|
|
31
33
|
'''
|
|
32
34
|
Prefetch all the resources' downloads for all selected organization into memory
|
|
File without changes
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import mongoengine
|
|
2
|
+
from flask import request
|
|
3
|
+
from flask_login import current_user
|
|
4
|
+
|
|
5
|
+
from udata.api import API, api, fields
|
|
6
|
+
from udata.api_fields import patch
|
|
7
|
+
|
|
8
|
+
from .constants import reports_reasons_translations
|
|
9
|
+
from .models import Report
|
|
10
|
+
|
|
11
|
+
ns = api.namespace('reports', 'User reported objects related operations (beta)')
|
|
12
|
+
|
|
13
|
+
@ns.route('/', endpoint='reports')
|
|
14
|
+
class ReportsAPI(API):
|
|
15
|
+
@api.doc('list_reports')
|
|
16
|
+
@api.expect(Report.__index_parser__)
|
|
17
|
+
@api.marshal_with(Report.__page_fields__)
|
|
18
|
+
def get(self):
|
|
19
|
+
query = Report.objects
|
|
20
|
+
|
|
21
|
+
return Report.apply_sort_filters_and_pagination(query)
|
|
22
|
+
|
|
23
|
+
@api.secure
|
|
24
|
+
@api.doc('create_report', responses={400: 'Validation error'})
|
|
25
|
+
@api.expect(Report.__write_fields__)
|
|
26
|
+
@api.marshal_with(Report.__read_fields__, code=201)
|
|
27
|
+
def post(self):
|
|
28
|
+
report = patch(Report(), request)
|
|
29
|
+
report.by = current_user._get_current_object()
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
report.save()
|
|
33
|
+
except mongoengine.errors.ValidationError as e:
|
|
34
|
+
api.abort(400, e.message)
|
|
35
|
+
|
|
36
|
+
return report, 201
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@ns.route('/reasons/', endpoint='reports_reasons')
|
|
40
|
+
class ReportsReasonsAPI(API):
|
|
41
|
+
@api.doc('list_reports_reasons')
|
|
42
|
+
@ns.response(200, "list of available reasons associated with their labels", fields.Raw)
|
|
43
|
+
def get(self):
|
|
44
|
+
return reports_reasons_translations()
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from udata.core.dataset.models import Dataset
|
|
2
|
+
from udata.i18n import lazy_gettext as _
|
|
3
|
+
|
|
4
|
+
REASON_PERSONAL_DATA = "personal_data"
|
|
5
|
+
REASON_EXPLICIT_CONTENT = "explicit_content"
|
|
6
|
+
REASON_ILLEGAL_CONTENT = "illegal_content"
|
|
7
|
+
REASON_OTHERS = "others"
|
|
8
|
+
REASON_SECURITY = "security"
|
|
9
|
+
REASON_SPAM = "spam"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def reports_reasons_translations() -> list:
|
|
13
|
+
"""
|
|
14
|
+
This is a function to avoid creating the list with a wrong lang
|
|
15
|
+
at the start of the app.
|
|
16
|
+
"""
|
|
17
|
+
return [
|
|
18
|
+
{"value": REASON_EXPLICIT_CONTENT, "label": _("Explicit content")},
|
|
19
|
+
{"value": REASON_ILLEGAL_CONTENT, "label": _("Illegal content")},
|
|
20
|
+
{"value": REASON_OTHERS, "label": _("Others")},
|
|
21
|
+
{"value": REASON_PERSONAL_DATA, "label": _("Personal data")},
|
|
22
|
+
{"value": REASON_SECURITY, "label": _("Security")},
|
|
23
|
+
{"value": REASON_SPAM, "label": _("Spam")},
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
REPORT_REASONS_CHOICES: list[str] = [
|
|
28
|
+
item["value"] for item in reports_reasons_translations()
|
|
29
|
+
]
|
|
30
|
+
REPORTABLE_MODELS = [Dataset]
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
|
|
3
|
+
from mongoengine import NULLIFY, signals
|
|
4
|
+
|
|
5
|
+
from udata.api_fields import field, generate_fields
|
|
6
|
+
from udata.core.user.api_fields import user_ref_fields
|
|
7
|
+
from udata.core.user.models import User
|
|
8
|
+
from udata.mongo import db
|
|
9
|
+
|
|
10
|
+
from .constants import REPORT_REASONS_CHOICES, REPORTABLE_MODELS
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@generate_fields()
|
|
14
|
+
class Report(db.Document):
|
|
15
|
+
by = field(
|
|
16
|
+
db.ReferenceField(User, reverse_delete_rule=NULLIFY),
|
|
17
|
+
nested_fields=user_ref_fields,
|
|
18
|
+
description="Only set if a user was connected when reporting an element.",
|
|
19
|
+
readonly=True,
|
|
20
|
+
allow_null=True,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
object_type = field(
|
|
24
|
+
db.StringField(choices=[m.__name__ for m in REPORTABLE_MODELS])
|
|
25
|
+
)
|
|
26
|
+
object_id = field(
|
|
27
|
+
db.ObjectIdField()
|
|
28
|
+
)
|
|
29
|
+
object_deleted_at = field(
|
|
30
|
+
db.DateTimeField(),
|
|
31
|
+
allow_null=True,
|
|
32
|
+
readonly=True,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
reason = field(
|
|
36
|
+
db.StringField(choices=REPORT_REASONS_CHOICES, required=True),
|
|
37
|
+
)
|
|
38
|
+
message = field(
|
|
39
|
+
db.StringField(),
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
reported_at = field(
|
|
43
|
+
db.DateTimeField(default=datetime.utcnow, required=True),
|
|
44
|
+
readonly=True,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
@classmethod
|
|
48
|
+
def mark_as_deleted_soft_delete(cls, sender, document, **kwargs):
|
|
49
|
+
if document.deleted:
|
|
50
|
+
Report.objects(object_type=sender.__name__, object_id=document.id, object_deleted_at=None).update(object_deleted_at=datetime.utcnow)
|
|
51
|
+
|
|
52
|
+
def mark_as_deleted_hard_delete(cls, document, **kwargs):
|
|
53
|
+
Report.objects(object_type=document.__class__.__name__, object_id=document.id, object_deleted_at=None).update(object_deleted_at=datetime.utcnow)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
for model in REPORTABLE_MODELS:
|
|
57
|
+
signals.post_save.connect(Report.mark_as_deleted_soft_delete, sender=model)
|
|
58
|
+
signals.post_delete.connect(Report.mark_as_deleted_hard_delete, sender=model)
|
udata/core/reuse/csv.py
CHANGED
|
@@ -15,10 +15,13 @@ class ReuseCsvAdapter(csv.Adapter):
|
|
|
15
15
|
('remote_url', 'url'),
|
|
16
16
|
('organization', 'organization.name'),
|
|
17
17
|
('organization_id', 'organization.id'),
|
|
18
|
+
('owner', 'owner.slug'), # in case it's owned by a user
|
|
19
|
+
('owner_id', 'owner.id'),
|
|
18
20
|
('image', lambda r: r.image(external=True)),
|
|
19
21
|
('featured', lambda r: r.featured or False),
|
|
20
22
|
'created_at',
|
|
21
23
|
'last_modified',
|
|
24
|
+
'topic',
|
|
22
25
|
('tags', lambda r: ','.join(r.tags)),
|
|
23
26
|
('datasets', lambda r: ','.join([str(d.id) for d in r.datasets])),
|
|
24
27
|
)
|
udata/core/site/api.py
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
from bson import ObjectId
|
|
2
2
|
|
|
3
3
|
from flask import request, redirect, url_for, json, make_response
|
|
4
|
+
from mongoengine import Q
|
|
4
5
|
|
|
5
6
|
from udata.api import api, API, fields
|
|
6
7
|
from udata.auth import admin_permission
|
|
8
|
+
from udata.core.dataservices.models import Dataservice
|
|
7
9
|
from udata.models import Dataset, Reuse
|
|
8
10
|
from udata.utils import multi_to_dict
|
|
9
11
|
from udata.rdf import (
|
|
@@ -105,8 +107,37 @@ class SiteRdfCatalogFormat(API):
|
|
|
105
107
|
params = multi_to_dict(request.args)
|
|
106
108
|
page = int(params.get('page', 1))
|
|
107
109
|
page_size = int(params.get('page_size', 100))
|
|
108
|
-
datasets = Dataset.objects.visible()
|
|
109
|
-
|
|
110
|
+
datasets = Dataset.objects.visible()
|
|
111
|
+
if 'tag' in params:
|
|
112
|
+
datasets = datasets.filter(tags=params.get('tag', ''))
|
|
113
|
+
datasets = datasets.paginate(page, page_size)
|
|
114
|
+
|
|
115
|
+
# We need to add Dataservice to the catalog.
|
|
116
|
+
# In the best world, we want:
|
|
117
|
+
# - Keep the correct number of datasets on the page (if the requested page size is 100, we should have 100 datasets)
|
|
118
|
+
# - Have simple MongoDB queries
|
|
119
|
+
# - Do not duplicate the datasets (each dataset is present once in the catalog)
|
|
120
|
+
# - Do not duplicate the dataservices (each dataservice is present once in the catalog)
|
|
121
|
+
# - Every referenced dataset for one dataservices present on the page (hard to do)
|
|
122
|
+
#
|
|
123
|
+
# Multiple solutions are possible but none check all the constraints.
|
|
124
|
+
# The selected one is to put all the dataservices referencing at least one of the dataset on
|
|
125
|
+
# the page at the end of it. It means dataservices could be duplicated (present on multiple pages)
|
|
126
|
+
# and these dataservices may referenced some datasets not present in the current page. It's working
|
|
127
|
+
# if somebody is doing the same thing as us (keeping the list of all the datasets IDs for the entire catalog then
|
|
128
|
+
# listing all dataservices in a second pass)
|
|
129
|
+
# Another option is to do some tricky Mongo requests to order/group datasets by their presence in some dataservices but
|
|
130
|
+
# it could be really hard to do with a n..n relation.
|
|
131
|
+
# Let's keep this solution simple right now and iterate on it in the future.
|
|
132
|
+
dataservices_filter = Q(datasets__in=[d.id for d in datasets])
|
|
133
|
+
|
|
134
|
+
# On the first page, add all dataservices without datasets
|
|
135
|
+
if page == 1:
|
|
136
|
+
dataservices_filter = dataservices_filter | Q(datasets__size=0)
|
|
137
|
+
|
|
138
|
+
dataservices = Dataservice.objects.visible().filter(dataservices_filter)
|
|
139
|
+
|
|
140
|
+
catalog = build_catalog(current_site, datasets, dataservices=dataservices, format=format)
|
|
110
141
|
# bypass flask-restplus make_response, since graph_response
|
|
111
142
|
# is handling the content negociation directly
|
|
112
143
|
return make_response(*graph_response(catalog, format))
|
udata/core/site/rdf.py
CHANGED
|
@@ -5,6 +5,7 @@ from flask import url_for, current_app
|
|
|
5
5
|
from rdflib import Graph, URIRef, Literal, BNode
|
|
6
6
|
from rdflib.namespace import RDF, FOAF
|
|
7
7
|
|
|
8
|
+
from udata.core.dataservices.rdf import dataservice_to_rdf
|
|
8
9
|
from udata.core.dataset.rdf import dataset_to_rdf
|
|
9
10
|
from udata.core.organization.rdf import organization_to_rdf
|
|
10
11
|
from udata.core.user.rdf import user_to_rdf
|
|
@@ -13,7 +14,7 @@ from udata.utils import Paginable
|
|
|
13
14
|
from udata.uris import endpoint_for
|
|
14
15
|
|
|
15
16
|
|
|
16
|
-
def build_catalog(site, datasets, format=None):
|
|
17
|
+
def build_catalog(site, datasets, dataservices = [], format=None):
|
|
17
18
|
'''Build the DCAT catalog for this site'''
|
|
18
19
|
site_url = endpoint_for('site.home_redirect', 'api.site', _external=True)
|
|
19
20
|
catalog_url = url_for('api.site_rdf_catalog', _external=True)
|
|
@@ -40,6 +41,10 @@ def build_catalog(site, datasets, format=None):
|
|
|
40
41
|
rdf_dataset.add(DCT.publisher, organization_to_rdf(dataset.organization, graph))
|
|
41
42
|
catalog.add(DCAT.dataset, rdf_dataset)
|
|
42
43
|
|
|
44
|
+
for dataservice in dataservices:
|
|
45
|
+
rdf_dataservice = dataservice_to_rdf(dataservice, graph)
|
|
46
|
+
catalog.add(DCAT.DataService, rdf_dataservice)
|
|
47
|
+
|
|
43
48
|
if isinstance(datasets, Paginable):
|
|
44
49
|
paginate_catalog(catalog, graph, datasets, format, 'api.site_rdf_catalog_format')
|
|
45
50
|
|
udata/core/spam/models.py
CHANGED
|
@@ -67,6 +67,9 @@ class SpamMixin(object):
|
|
|
67
67
|
if not self.spam:
|
|
68
68
|
self.spam = SpamInfo(status=NOT_CHECKED, callbacks={})
|
|
69
69
|
|
|
70
|
+
if self.spam_is_whitelisted():
|
|
71
|
+
return
|
|
72
|
+
|
|
70
73
|
# The breadcrumb is useful during reporting to know where we came from
|
|
71
74
|
# in case of a potential spam inside an embed.
|
|
72
75
|
if breadcrumb is None:
|
|
@@ -139,6 +142,9 @@ class SpamMixin(object):
|
|
|
139
142
|
def embeds_to_check_for_spam(self):
|
|
140
143
|
return []
|
|
141
144
|
|
|
145
|
+
def spam_is_whitelisted(self) -> bool :
|
|
146
|
+
return False
|
|
147
|
+
|
|
142
148
|
def spam_report_message(self):
|
|
143
149
|
return f"Spam potentiel sur {type(self).__name__}"
|
|
144
150
|
|
udata/core/topic/models.py
CHANGED
|
@@ -3,7 +3,7 @@ from mongoengine.signals import pre_save
|
|
|
3
3
|
from udata.models import db, SpatialCoverage
|
|
4
4
|
from udata.search import reindex
|
|
5
5
|
from udata.tasks import as_task_param
|
|
6
|
-
from udata.core.owned import Owned
|
|
6
|
+
from udata.core.owned import Owned, OwnedQuerySet
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
__all__ = ('Topic', )
|
|
@@ -36,7 +36,8 @@ class Topic(db.Document, Owned, db.Datetimed):
|
|
|
36
36
|
'slug'
|
|
37
37
|
] + Owned.meta['indexes'],
|
|
38
38
|
'ordering': ['-created_at'],
|
|
39
|
-
'auto_create_index_on_save': True
|
|
39
|
+
'auto_create_index_on_save': True,
|
|
40
|
+
'queryset_class': OwnedQuerySet,
|
|
40
41
|
}
|
|
41
42
|
|
|
42
43
|
def __str__(self):
|
udata/core/topic/parsers.py
CHANGED
|
@@ -11,10 +11,11 @@ class TopicApiParser(ModelApiParser):
|
|
|
11
11
|
'last_modified': 'last_modified',
|
|
12
12
|
}
|
|
13
13
|
|
|
14
|
-
def __init__(self):
|
|
14
|
+
def __init__(self, with_include_private=True):
|
|
15
15
|
super().__init__()
|
|
16
|
+
if with_include_private:
|
|
17
|
+
self.parser.add_argument('include_private', type=bool, location='args')
|
|
16
18
|
self.parser.add_argument('tag', type=str, location='args')
|
|
17
|
-
self.parser.add_argument('include_private', type=bool, location='args')
|
|
18
19
|
self.parser.add_argument('geozone', type=str, location='args')
|
|
19
20
|
self.parser.add_argument('granularity', type=str, location='args')
|
|
20
21
|
self.parser.add_argument('organization', type=str, location='args')
|
udata/core/user/apiv2.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from flask_security import current_user
|
|
2
|
+
|
|
3
|
+
from udata.api import apiv2, API
|
|
4
|
+
from udata.core.topic.apiv2 import topic_page_fields
|
|
5
|
+
from udata.core.topic.parsers import TopicApiParser
|
|
6
|
+
from udata.models import Topic
|
|
7
|
+
|
|
8
|
+
me = apiv2.namespace('me', 'Connected user related operations (v2)')
|
|
9
|
+
|
|
10
|
+
# we will force include_private to True, no need for this arg
|
|
11
|
+
topic_parser = TopicApiParser(with_include_private=False)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@me.route('/org_topics/', endpoint='my_org_topics')
|
|
15
|
+
class MyOrgTopicsAPI(API):
|
|
16
|
+
@apiv2.secure
|
|
17
|
+
@apiv2.doc('my_org_topics')
|
|
18
|
+
@apiv2.expect(topic_parser.parser)
|
|
19
|
+
@apiv2.marshal_list_with(topic_page_fields)
|
|
20
|
+
def get(self):
|
|
21
|
+
'''List all topics related to me and my organizations.'''
|
|
22
|
+
args = topic_parser.parse()
|
|
23
|
+
args["include_private"] = True
|
|
24
|
+
owners = list(current_user.organizations) + [current_user.id]
|
|
25
|
+
topics = Topic.objects.owned_by(*owners)
|
|
26
|
+
topics = topic_parser.parse_filters(topics, args)
|
|
27
|
+
sort = args['sort'] or ('$text_score' if args['q'] else None) or '-last-modified'
|
|
28
|
+
return topics.order_by(sort).paginate(args['page'], args['page_size'])
|
udata/db/__init__.py
ADDED
|
File without changes
|
udata/db/tasks.py
ADDED
udata/forms/fields.py
CHANGED
|
@@ -206,8 +206,8 @@ class URLField(EmptyNone, Field):
|
|
|
206
206
|
if self.data:
|
|
207
207
|
try:
|
|
208
208
|
uris.validate(self.data)
|
|
209
|
-
except uris.ValidationError:
|
|
210
|
-
raise validators.ValidationError(
|
|
209
|
+
except uris.ValidationError as e:
|
|
210
|
+
raise validators.ValidationError(str(e))
|
|
211
211
|
return True
|
|
212
212
|
|
|
213
213
|
def process_formdata(self, valuelist):
|