udata 8.0.2.dev29284__py2.py3-none-any.whl → 8.0.2.dev29339__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of udata might be problematic. Click here for more details.
- udata/core/dataset/rdf.py +56 -14
- udata/core/site/api.py +4 -1
- udata/harvest/tests/dcat/bnodes.xml +10 -1
- udata/harvest/tests/test_dcat_backend.py +1 -1
- udata/rdf.py +4 -0
- udata/static/chunks/{11.7266fef2dddc1db403d9.js → 11.ae54612e36c6d46f85db.js} +3 -3
- udata/static/chunks/{11.7266fef2dddc1db403d9.js.map → 11.ae54612e36c6d46f85db.js.map} +1 -1
- udata/static/chunks/{13.91b177d7d531fd55cf5d.js → 13.d8ccb992a49875966313.js} +2 -2
- udata/static/chunks/{13.91b177d7d531fd55cf5d.js.map → 13.d8ccb992a49875966313.js.map} +1 -1
- udata/static/chunks/{16.e866757bab9f6b0a3f1b.js → 16.4565605e68bab129a471.js} +2 -2
- udata/static/chunks/{16.e866757bab9f6b0a3f1b.js.map → 16.4565605e68bab129a471.js.map} +1 -1
- udata/static/chunks/{19.619b83ac597516dcd03e.js → 19.f993a75d5bfe2382548d.js} +3 -3
- udata/static/chunks/{19.619b83ac597516dcd03e.js.map → 19.f993a75d5bfe2382548d.js.map} +1 -1
- udata/static/chunks/{5.48417db6b33328fa9d6a.js → 5.cc2e7bf65ef32f9c8604.js} +3 -3
- udata/static/chunks/{5.48417db6b33328fa9d6a.js.map → 5.cc2e7bf65ef32f9c8604.js.map} +1 -1
- udata/static/chunks/{6.f84539bd4c419b36cc19.js → 6.cad898a38692eda28965.js} +3 -3
- udata/static/chunks/{6.f84539bd4c419b36cc19.js.map → 6.cad898a38692eda28965.js.map} +1 -1
- udata/static/chunks/{9.07503e7f7ec02919f696.js → 9.d5b992e9ef51921aeb57.js} +2 -2
- udata/static/chunks/{9.07503e7f7ec02919f696.js.map → 9.d5b992e9ef51921aeb57.js.map} +1 -1
- udata/static/common.js +1 -1
- udata/static/common.js.map +1 -1
- udata/tests/dataset/test_dataset_rdf.py +17 -2
- udata/tests/site/test_site_rdf.py +16 -0
- {udata-8.0.2.dev29284.dist-info → udata-8.0.2.dev29339.dist-info}/METADATA +3 -1
- {udata-8.0.2.dev29284.dist-info → udata-8.0.2.dev29339.dist-info}/RECORD +29 -29
- {udata-8.0.2.dev29284.dist-info → udata-8.0.2.dev29339.dist-info}/LICENSE +0 -0
- {udata-8.0.2.dev29284.dist-info → udata-8.0.2.dev29339.dist-info}/WHEEL +0 -0
- {udata-8.0.2.dev29284.dist-info → udata-8.0.2.dev29339.dist-info}/entry_points.txt +0 -0
- {udata-8.0.2.dev29284.dist-info → udata-8.0.2.dev29339.dist-info}/top_level.txt +0 -0
udata/core/dataset/rdf.py
CHANGED
|
@@ -22,9 +22,10 @@ from udata.frontend.markdown import parse_html
|
|
|
22
22
|
from udata.core.dataset.models import HarvestDatasetMetadata, HarvestResourceMetadata
|
|
23
23
|
from udata.models import db, ContactPoint
|
|
24
24
|
from udata.rdf import (
|
|
25
|
-
DCAT, DCT, FREQ, SCV, SKOS, SPDX, SCHEMA, EUFREQ, EUFORMAT, IANAFORMAT, VCARD, RDFS,
|
|
26
|
-
namespace_manager, schema_from_rdf, url_from_rdf
|
|
25
|
+
DCAT, DCATAP, DCT, FREQ, SCV, SKOS, SPDX, SCHEMA, EUFREQ, EUFORMAT, IANAFORMAT, VCARD, RDFS,
|
|
26
|
+
HVD_LEGISLATION, namespace_manager, schema_from_rdf, url_from_rdf
|
|
27
27
|
)
|
|
28
|
+
from udata.tags import slug as slugify_tag
|
|
28
29
|
from udata.utils import get_by, safe_unicode
|
|
29
30
|
from udata.uris import endpoint_for
|
|
30
31
|
|
|
@@ -85,6 +86,7 @@ EU_HVD_CATEGORIES = {
|
|
|
85
86
|
"http://data.europa.eu/bna/c_dd313021": "Observation de la terre et environnement",
|
|
86
87
|
"http://data.europa.eu/bna/c_e1da4e07": "Statistiques"
|
|
87
88
|
}
|
|
89
|
+
TAG_TO_EU_HVD_CATEGORIES = {slugify_tag(EU_HVD_CATEGORIES[uri]): uri for uri in EU_HVD_CATEGORIES}
|
|
88
90
|
|
|
89
91
|
|
|
90
92
|
class HTMLDetector(HTMLParser):
|
|
@@ -141,7 +143,7 @@ def owner_to_rdf(dataset, graph=None):
|
|
|
141
143
|
return
|
|
142
144
|
|
|
143
145
|
|
|
144
|
-
def resource_to_rdf(resource, dataset=None, graph=None):
|
|
146
|
+
def resource_to_rdf(resource, dataset=None, graph=None, is_hvd=False):
|
|
145
147
|
'''
|
|
146
148
|
Map a Resource domain model to a DCAT/RDF graph
|
|
147
149
|
'''
|
|
@@ -180,6 +182,9 @@ def resource_to_rdf(resource, dataset=None, graph=None):
|
|
|
180
182
|
checksum.add(SPDX.algorithm, getattr(SPDX, algorithm))
|
|
181
183
|
checksum.add(SPDX.checksumValue, Literal(resource.checksum.value))
|
|
182
184
|
r.add(SPDX.checksum, checksum)
|
|
185
|
+
if is_hvd:
|
|
186
|
+
# DCAT-AP HVD applicable legislation is also expected at the distribution level
|
|
187
|
+
r.add(DCATAP.applicableLegislation, URIRef(HVD_LEGISLATION))
|
|
183
188
|
return r
|
|
184
189
|
|
|
185
190
|
|
|
@@ -214,11 +219,20 @@ def dataset_to_rdf(dataset, graph=None):
|
|
|
214
219
|
if dataset.acronym:
|
|
215
220
|
d.set(SKOS.altLabel, Literal(dataset.acronym))
|
|
216
221
|
|
|
222
|
+
# Add DCAT-AP HVD properties if the dataset is tagged hvd.
|
|
223
|
+
# See https://semiceu.github.io/DCAT-AP/releases/2.2.0-hvd/
|
|
224
|
+
is_hvd = current_app.config['HVD_SUPPORT'] and 'hvd' in dataset.tags
|
|
225
|
+
if is_hvd:
|
|
226
|
+
d.add(DCATAP.applicableLegislation, URIRef(HVD_LEGISLATION))
|
|
227
|
+
|
|
217
228
|
for tag in dataset.tags:
|
|
218
229
|
d.add(DCAT.keyword, Literal(tag))
|
|
230
|
+
# Add HVD category if this dataset is tagged HVD
|
|
231
|
+
if is_hvd and tag in TAG_TO_EU_HVD_CATEGORIES:
|
|
232
|
+
d.add(DCATAP.hvdCategory, URIRef(TAG_TO_EU_HVD_CATEGORIES[tag]))
|
|
219
233
|
|
|
220
234
|
for resource in dataset.resources:
|
|
221
|
-
d.add(DCAT.distribution, resource_to_rdf(resource, dataset, graph))
|
|
235
|
+
d.add(DCAT.distribution, resource_to_rdf(resource, dataset, graph, is_hvd))
|
|
222
236
|
|
|
223
237
|
if dataset.temporal_coverage:
|
|
224
238
|
d.set(DCT.temporal, temporal_to_rdf(dataset.temporal_coverage, graph))
|
|
@@ -381,23 +395,51 @@ def spatial_from_rdf(graph):
|
|
|
381
395
|
else:
|
|
382
396
|
continue
|
|
383
397
|
|
|
384
|
-
if geojson['type'] == 'Polygon':
|
|
385
|
-
geojson['type'] = 'MultiPolygon'
|
|
386
|
-
geojson['coordinates'] = [geojson['coordinates']]
|
|
387
|
-
|
|
388
398
|
geojsons.append(geojson)
|
|
389
399
|
except Exception as e:
|
|
390
400
|
log.exception(f"Exception during `spatial_from_rdf` for term {term}: {e}", stack_info=True)
|
|
391
401
|
|
|
402
|
+
if not geojsons:
|
|
403
|
+
return None
|
|
404
|
+
|
|
405
|
+
# We first try to build a big MultiPolygon with all the spatial coverages found in RDF.
|
|
406
|
+
# We deduplicate the coordinates because some backend provides the same coordinates multiple
|
|
407
|
+
# times in different format. We only support in this first pass Polygons and MultiPolygons. Not sure
|
|
408
|
+
# if there are other types of spatial coverage worth integrating (points? line strings?). But these other
|
|
409
|
+
# formats are not compatible to be merged in the unique stored representation in MongoDB, we'll deal with them in a second pass.
|
|
410
|
+
# The merging lose the properties and other information inside the GeoJSON…
|
|
411
|
+
# Note that having multiple `Polygon` is not really the DCAT way of doing things, the standard require that you use
|
|
412
|
+
# a `MultiPolygon` in this case. We support this right now, and wait and see if it raises problems in the future for
|
|
413
|
+
# people following the standard. (see https://github.com/datagouv/data.gouv.fr/issues/1362#issuecomment-2112774115)
|
|
414
|
+
polygons = []
|
|
392
415
|
for geojson in geojsons:
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
416
|
+
if geojson['type'] == 'Polygon':
|
|
417
|
+
if geojson['coordinates'] not in polygons:
|
|
418
|
+
polygons.append(geojson['coordinates'])
|
|
419
|
+
elif geojson['type'] == 'MultiPolygon':
|
|
420
|
+
for coordinates in geojson['coordinates']:
|
|
421
|
+
if coordinates not in polygons:
|
|
422
|
+
polygons.append(coordinates)
|
|
423
|
+
else:
|
|
424
|
+
log.warning(f"Unsupported GeoJSON type '{geojson['type']}'")
|
|
398
425
|
continue
|
|
399
426
|
|
|
400
|
-
|
|
427
|
+
if not polygons:
|
|
428
|
+
log.warning(f"No supported types found in the GeoJSON data.")
|
|
429
|
+
return None
|
|
430
|
+
|
|
431
|
+
spatial_coverage = SpatialCoverage(geom={
|
|
432
|
+
'type': 'MultiPolygon',
|
|
433
|
+
'coordinates': polygons,
|
|
434
|
+
})
|
|
435
|
+
|
|
436
|
+
try:
|
|
437
|
+
spatial_coverage.clean()
|
|
438
|
+
return spatial_coverage
|
|
439
|
+
except ValidationError as e:
|
|
440
|
+
log.warning(f"Cannot save the spatial coverage {coordinates} (error was {e})")
|
|
441
|
+
return None
|
|
442
|
+
|
|
401
443
|
|
|
402
444
|
def frequency_from_rdf(term):
|
|
403
445
|
if isinstance(term, str):
|
udata/core/site/api.py
CHANGED
|
@@ -105,7 +105,10 @@ class SiteRdfCatalogFormat(API):
|
|
|
105
105
|
params = multi_to_dict(request.args)
|
|
106
106
|
page = int(params.get('page', 1))
|
|
107
107
|
page_size = int(params.get('page_size', 100))
|
|
108
|
-
datasets = Dataset.objects.visible()
|
|
108
|
+
datasets = Dataset.objects.visible()
|
|
109
|
+
if 'tag' in params:
|
|
110
|
+
datasets = datasets.filter(tags=params.get('tag', ''))
|
|
111
|
+
datasets = datasets.paginate(page, page_size)
|
|
109
112
|
catalog = build_catalog(current_site, datasets, format=format)
|
|
110
113
|
# bypass flask-restplus make_response, since graph_response
|
|
111
114
|
# is handling the content negociation directly
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
xmlns:dct="http://purl.org/dc/terms/"
|
|
8
8
|
xmlns:ogc="http://www.opengis.net/ogc"
|
|
9
9
|
xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#"
|
|
10
|
+
xmlns:locn="http://www.w3.org/ns/locn#"
|
|
10
11
|
xmlns:dcterms="http://purl.org/dc/terms/"
|
|
11
12
|
xmlns:vcard="http://www.w3.org/2006/vcard/ns#"
|
|
12
13
|
xmlns:schema="http://schema.org/"
|
|
@@ -89,8 +90,16 @@
|
|
|
89
90
|
<dcterms:title>Dataset 2</dcterms:title>
|
|
90
91
|
<dct:spatial>
|
|
91
92
|
<ogc:Polygon>
|
|
93
|
+
<locn:geometry rdf:datatype="https://www.iana.org/assignments/media-types/application/vnd.geo+json"><![CDATA[{"type":"Polygon","coordinates":[[[-6,51],[10,51],[10,40],[-6,40],[-6,51]]]}]]></locn:geometry>
|
|
92
94
|
<geo:asWKT rdf:datatype="http://www.opengis.net/rdf#wktLiteral">
|
|
93
|
-
Polygon((
|
|
95
|
+
Polygon((159 -25, 159 -11, 212 -11, 212 -25, 159 -25))
|
|
96
|
+
</geo:asWKT>
|
|
97
|
+
<geo:asWKT rdf:datatype="http://www.opengis.net/rdf#wktLiteral">
|
|
98
|
+
Polygon((4 45, 4 46, 4 46, 4 45, 4 45))
|
|
99
|
+
</geo:asWKT>
|
|
100
|
+
<locn:geometry rdf:datatype="https://www.iana.org/assignments/media-types/application/vnd.geo+json"><![CDATA[{"type":"Polygon","coordinates":[[[4, 45], [4, 46], [4, 46], [4, 45], [4, 45]]]}]]></locn:geometry>
|
|
101
|
+
<geo:asWKT rdf:datatype="http://www.opengis.net/rdf#wktLiteral">
|
|
102
|
+
Polygon((159 -25, 159 -11, 212 -11, 212 -25, 159 -25))
|
|
94
103
|
</geo:asWKT>
|
|
95
104
|
</ogc:Polygon>
|
|
96
105
|
</dct:spatial>
|
|
@@ -268,7 +268,7 @@ class DcatBackendTest:
|
|
|
268
268
|
datasets = {d.harvest.dct_identifier: d for d in Dataset.objects}
|
|
269
269
|
|
|
270
270
|
assert datasets['1'].spatial == None
|
|
271
|
-
assert datasets['2'].spatial.geom == {'type': 'MultiPolygon', 'coordinates': [[[[4
|
|
271
|
+
assert datasets['2'].spatial.geom == {'type': 'MultiPolygon', 'coordinates': [[[[-6,51],[10,51],[10,40],[-6,40],[-6,51]]], [[[4, 45], [4, 46], [4, 46], [4, 45], [4, 45]]], [[[159, -25.], [159, -11], [212, -11], [212, -25.], [159, -25.]]]]}
|
|
272
272
|
assert datasets['3'].spatial == None
|
|
273
273
|
|
|
274
274
|
@pytest.mark.options(SCHEMA_CATALOG_URL='https://example.com/schemas')
|
udata/rdf.py
CHANGED
|
@@ -21,6 +21,7 @@ log = logging.getLogger(__name__)
|
|
|
21
21
|
# Extra Namespaces
|
|
22
22
|
ADMS = Namespace('http://www.w3.org/ns/adms#')
|
|
23
23
|
DCAT = Namespace('http://www.w3.org/ns/dcat#')
|
|
24
|
+
DCATAP = Namespace('http://data.europa.eu/r5r/')
|
|
24
25
|
HYDRA = Namespace('http://www.w3.org/ns/hydra/core#')
|
|
25
26
|
SCHEMA = Namespace('http://schema.org/')
|
|
26
27
|
SCV = Namespace('http://purl.org/NET/scovo#')
|
|
@@ -35,6 +36,7 @@ VCARD = Namespace('http://www.w3.org/2006/vcard/ns#')
|
|
|
35
36
|
|
|
36
37
|
namespace_manager = NamespaceManager(Graph())
|
|
37
38
|
namespace_manager.bind('dcat', DCAT)
|
|
39
|
+
namespace_manager.bind('dcatap', DCATAP)
|
|
38
40
|
namespace_manager.bind('dct', DCT)
|
|
39
41
|
namespace_manager.bind('foaf', FOAF)
|
|
40
42
|
namespace_manager.bind('foaf', FOAF)
|
|
@@ -98,6 +100,8 @@ RDF_EXTENSIONS = {
|
|
|
98
100
|
# Includes control characters, unicode surrogate characters and unicode end-of-plane non-characters
|
|
99
101
|
ILLEGAL_XML_CHARS = '[\x00-\x08\x0b\x0c\x0e-\x1F\uD800-\uDFFF\uFFFE\uFFFF]'
|
|
100
102
|
|
|
103
|
+
HVD_LEGISLATION = 'http://data.europa.eu/eli/reg_impl/2023/138/oj'
|
|
104
|
+
|
|
101
105
|
|
|
102
106
|
def guess_format(string):
|
|
103
107
|
'''Guess format given an extension or a mime-type'''
|