udata 8.0.2.dev29284__py2.py3-none-any.whl → 8.0.2.dev29339__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of udata might be problematic. Click here for more details.

Files changed (29) hide show
  1. udata/core/dataset/rdf.py +56 -14
  2. udata/core/site/api.py +4 -1
  3. udata/harvest/tests/dcat/bnodes.xml +10 -1
  4. udata/harvest/tests/test_dcat_backend.py +1 -1
  5. udata/rdf.py +4 -0
  6. udata/static/chunks/{11.7266fef2dddc1db403d9.js → 11.ae54612e36c6d46f85db.js} +3 -3
  7. udata/static/chunks/{11.7266fef2dddc1db403d9.js.map → 11.ae54612e36c6d46f85db.js.map} +1 -1
  8. udata/static/chunks/{13.91b177d7d531fd55cf5d.js → 13.d8ccb992a49875966313.js} +2 -2
  9. udata/static/chunks/{13.91b177d7d531fd55cf5d.js.map → 13.d8ccb992a49875966313.js.map} +1 -1
  10. udata/static/chunks/{16.e866757bab9f6b0a3f1b.js → 16.4565605e68bab129a471.js} +2 -2
  11. udata/static/chunks/{16.e866757bab9f6b0a3f1b.js.map → 16.4565605e68bab129a471.js.map} +1 -1
  12. udata/static/chunks/{19.619b83ac597516dcd03e.js → 19.f993a75d5bfe2382548d.js} +3 -3
  13. udata/static/chunks/{19.619b83ac597516dcd03e.js.map → 19.f993a75d5bfe2382548d.js.map} +1 -1
  14. udata/static/chunks/{5.48417db6b33328fa9d6a.js → 5.cc2e7bf65ef32f9c8604.js} +3 -3
  15. udata/static/chunks/{5.48417db6b33328fa9d6a.js.map → 5.cc2e7bf65ef32f9c8604.js.map} +1 -1
  16. udata/static/chunks/{6.f84539bd4c419b36cc19.js → 6.cad898a38692eda28965.js} +3 -3
  17. udata/static/chunks/{6.f84539bd4c419b36cc19.js.map → 6.cad898a38692eda28965.js.map} +1 -1
  18. udata/static/chunks/{9.07503e7f7ec02919f696.js → 9.d5b992e9ef51921aeb57.js} +2 -2
  19. udata/static/chunks/{9.07503e7f7ec02919f696.js.map → 9.d5b992e9ef51921aeb57.js.map} +1 -1
  20. udata/static/common.js +1 -1
  21. udata/static/common.js.map +1 -1
  22. udata/tests/dataset/test_dataset_rdf.py +17 -2
  23. udata/tests/site/test_site_rdf.py +16 -0
  24. {udata-8.0.2.dev29284.dist-info → udata-8.0.2.dev29339.dist-info}/METADATA +3 -1
  25. {udata-8.0.2.dev29284.dist-info → udata-8.0.2.dev29339.dist-info}/RECORD +29 -29
  26. {udata-8.0.2.dev29284.dist-info → udata-8.0.2.dev29339.dist-info}/LICENSE +0 -0
  27. {udata-8.0.2.dev29284.dist-info → udata-8.0.2.dev29339.dist-info}/WHEEL +0 -0
  28. {udata-8.0.2.dev29284.dist-info → udata-8.0.2.dev29339.dist-info}/entry_points.txt +0 -0
  29. {udata-8.0.2.dev29284.dist-info → udata-8.0.2.dev29339.dist-info}/top_level.txt +0 -0
udata/core/dataset/rdf.py CHANGED
@@ -22,9 +22,10 @@ from udata.frontend.markdown import parse_html
22
22
  from udata.core.dataset.models import HarvestDatasetMetadata, HarvestResourceMetadata
23
23
  from udata.models import db, ContactPoint
24
24
  from udata.rdf import (
25
- DCAT, DCT, FREQ, SCV, SKOS, SPDX, SCHEMA, EUFREQ, EUFORMAT, IANAFORMAT, VCARD, RDFS,
26
- namespace_manager, schema_from_rdf, url_from_rdf
25
+ DCAT, DCATAP, DCT, FREQ, SCV, SKOS, SPDX, SCHEMA, EUFREQ, EUFORMAT, IANAFORMAT, VCARD, RDFS,
26
+ HVD_LEGISLATION, namespace_manager, schema_from_rdf, url_from_rdf
27
27
  )
28
+ from udata.tags import slug as slugify_tag
28
29
  from udata.utils import get_by, safe_unicode
29
30
  from udata.uris import endpoint_for
30
31
 
@@ -85,6 +86,7 @@ EU_HVD_CATEGORIES = {
85
86
  "http://data.europa.eu/bna/c_dd313021": "Observation de la terre et environnement",
86
87
  "http://data.europa.eu/bna/c_e1da4e07": "Statistiques"
87
88
  }
89
+ TAG_TO_EU_HVD_CATEGORIES = {slugify_tag(EU_HVD_CATEGORIES[uri]): uri for uri in EU_HVD_CATEGORIES}
88
90
 
89
91
 
90
92
  class HTMLDetector(HTMLParser):
@@ -141,7 +143,7 @@ def owner_to_rdf(dataset, graph=None):
141
143
  return
142
144
 
143
145
 
144
- def resource_to_rdf(resource, dataset=None, graph=None):
146
+ def resource_to_rdf(resource, dataset=None, graph=None, is_hvd=False):
145
147
  '''
146
148
  Map a Resource domain model to a DCAT/RDF graph
147
149
  '''
@@ -180,6 +182,9 @@ def resource_to_rdf(resource, dataset=None, graph=None):
180
182
  checksum.add(SPDX.algorithm, getattr(SPDX, algorithm))
181
183
  checksum.add(SPDX.checksumValue, Literal(resource.checksum.value))
182
184
  r.add(SPDX.checksum, checksum)
185
+ if is_hvd:
186
+ # DCAT-AP HVD applicable legislation is also expected at the distribution level
187
+ r.add(DCATAP.applicableLegislation, URIRef(HVD_LEGISLATION))
183
188
  return r
184
189
 
185
190
 
@@ -214,11 +219,20 @@ def dataset_to_rdf(dataset, graph=None):
214
219
  if dataset.acronym:
215
220
  d.set(SKOS.altLabel, Literal(dataset.acronym))
216
221
 
222
+ # Add DCAT-AP HVD properties if the dataset is tagged hvd.
223
+ # See https://semiceu.github.io/DCAT-AP/releases/2.2.0-hvd/
224
+ is_hvd = current_app.config['HVD_SUPPORT'] and 'hvd' in dataset.tags
225
+ if is_hvd:
226
+ d.add(DCATAP.applicableLegislation, URIRef(HVD_LEGISLATION))
227
+
217
228
  for tag in dataset.tags:
218
229
  d.add(DCAT.keyword, Literal(tag))
230
+ # Add HVD category if this dataset is tagged HVD
231
+ if is_hvd and tag in TAG_TO_EU_HVD_CATEGORIES:
232
+ d.add(DCATAP.hvdCategory, URIRef(TAG_TO_EU_HVD_CATEGORIES[tag]))
219
233
 
220
234
  for resource in dataset.resources:
221
- d.add(DCAT.distribution, resource_to_rdf(resource, dataset, graph))
235
+ d.add(DCAT.distribution, resource_to_rdf(resource, dataset, graph, is_hvd))
222
236
 
223
237
  if dataset.temporal_coverage:
224
238
  d.set(DCT.temporal, temporal_to_rdf(dataset.temporal_coverage, graph))
@@ -381,23 +395,51 @@ def spatial_from_rdf(graph):
381
395
  else:
382
396
  continue
383
397
 
384
- if geojson['type'] == 'Polygon':
385
- geojson['type'] = 'MultiPolygon'
386
- geojson['coordinates'] = [geojson['coordinates']]
387
-
388
398
  geojsons.append(geojson)
389
399
  except Exception as e:
390
400
  log.exception(f"Exception during `spatial_from_rdf` for term {term}: {e}", stack_info=True)
391
401
 
402
+ if not geojsons:
403
+ return None
404
+
405
+ # We first try to build a big MultiPolygon with all the spatial coverages found in RDF.
406
+ # We deduplicate the coordinates because some backend provides the same coordinates multiple
407
+ # times in different format. We only support in this first pass Polygons and MultiPolygons. Not sure
408
+ # if there are other types of spatial coverage worth integrating (points? line strings?). But these other
409
+ # formats are not compatible to be merged in the unique stored representation in MongoDB, we'll deal with them in a second pass.
410
+ # The merging lose the properties and other information inside the GeoJSON…
411
+ # Note that having multiple `Polygon` is not really the DCAT way of doing things, the standard require that you use
412
+ # a `MultiPolygon` in this case. We support this right now, and wait and see if it raises problems in the future for
413
+ # people following the standard. (see https://github.com/datagouv/data.gouv.fr/issues/1362#issuecomment-2112774115)
414
+ polygons = []
392
415
  for geojson in geojsons:
393
- spatial_coverage = SpatialCoverage(geom=geojson)
394
- try:
395
- spatial_coverage.clean()
396
- return spatial_coverage
397
- except ValidationError:
416
+ if geojson['type'] == 'Polygon':
417
+ if geojson['coordinates'] not in polygons:
418
+ polygons.append(geojson['coordinates'])
419
+ elif geojson['type'] == 'MultiPolygon':
420
+ for coordinates in geojson['coordinates']:
421
+ if coordinates not in polygons:
422
+ polygons.append(coordinates)
423
+ else:
424
+ log.warning(f"Unsupported GeoJSON type '{geojson['type']}'")
398
425
  continue
399
426
 
400
- return None
427
+ if not polygons:
428
+ log.warning(f"No supported types found in the GeoJSON data.")
429
+ return None
430
+
431
+ spatial_coverage = SpatialCoverage(geom={
432
+ 'type': 'MultiPolygon',
433
+ 'coordinates': polygons,
434
+ })
435
+
436
+ try:
437
+ spatial_coverage.clean()
438
+ return spatial_coverage
439
+ except ValidationError as e:
440
+ log.warning(f"Cannot save the spatial coverage {coordinates} (error was {e})")
441
+ return None
442
+
401
443
 
402
444
  def frequency_from_rdf(term):
403
445
  if isinstance(term, str):
udata/core/site/api.py CHANGED
@@ -105,7 +105,10 @@ class SiteRdfCatalogFormat(API):
105
105
  params = multi_to_dict(request.args)
106
106
  page = int(params.get('page', 1))
107
107
  page_size = int(params.get('page_size', 100))
108
- datasets = Dataset.objects.visible().paginate(page, page_size)
108
+ datasets = Dataset.objects.visible()
109
+ if 'tag' in params:
110
+ datasets = datasets.filter(tags=params.get('tag', ''))
111
+ datasets = datasets.paginate(page, page_size)
109
112
  catalog = build_catalog(current_site, datasets, format=format)
110
113
  # bypass flask-restplus make_response, since graph_response
111
114
  # is handling the content negociation directly
@@ -7,6 +7,7 @@
7
7
  xmlns:dct="http://purl.org/dc/terms/"
8
8
  xmlns:ogc="http://www.opengis.net/ogc"
9
9
  xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#"
10
+ xmlns:locn="http://www.w3.org/ns/locn#"
10
11
  xmlns:dcterms="http://purl.org/dc/terms/"
11
12
  xmlns:vcard="http://www.w3.org/2006/vcard/ns#"
12
13
  xmlns:schema="http://schema.org/"
@@ -89,8 +90,16 @@
89
90
  <dcterms:title>Dataset 2</dcterms:title>
90
91
  <dct:spatial>
91
92
  <ogc:Polygon>
93
+ <locn:geometry rdf:datatype="https://www.iana.org/assignments/media-types/application/vnd.geo+json"><![CDATA[{"type":"Polygon","coordinates":[[[-6,51],[10,51],[10,40],[-6,40],[-6,51]]]}]]></locn:geometry>
92
94
  <geo:asWKT rdf:datatype="http://www.opengis.net/rdf#wktLiteral">
93
- Polygon((4.44641288 45.54214467, 4.44641288 46.01316963, 4.75655252 46.01316963, 4.75655252 45.54214467, 4.44641288 45.54214467))
95
+ Polygon((159 -25, 159 -11, 212 -11, 212 -25, 159 -25))
96
+ </geo:asWKT>
97
+ <geo:asWKT rdf:datatype="http://www.opengis.net/rdf#wktLiteral">
98
+ Polygon((4 45, 4 46, 4 46, 4 45, 4 45))
99
+ </geo:asWKT>
100
+ <locn:geometry rdf:datatype="https://www.iana.org/assignments/media-types/application/vnd.geo+json"><![CDATA[{"type":"Polygon","coordinates":[[[4, 45], [4, 46], [4, 46], [4, 45], [4, 45]]]}]]></locn:geometry>
101
+ <geo:asWKT rdf:datatype="http://www.opengis.net/rdf#wktLiteral">
102
+ Polygon((159 -25, 159 -11, 212 -11, 212 -25, 159 -25))
94
103
  </geo:asWKT>
95
104
  </ogc:Polygon>
96
105
  </dct:spatial>
@@ -268,7 +268,7 @@ class DcatBackendTest:
268
268
  datasets = {d.harvest.dct_identifier: d for d in Dataset.objects}
269
269
 
270
270
  assert datasets['1'].spatial == None
271
- assert datasets['2'].spatial.geom == {'type': 'MultiPolygon', 'coordinates': [[[[4.44641288, 45.54214467], [4.44641288, 46.01316963], [4.75655252, 46.01316963], [4.75655252, 45.54214467], [4.44641288, 45.54214467]]]]}
271
+ assert datasets['2'].spatial.geom == {'type': 'MultiPolygon', 'coordinates': [[[[-6,51],[10,51],[10,40],[-6,40],[-6,51]]], [[[4, 45], [4, 46], [4, 46], [4, 45], [4, 45]]], [[[159, -25.], [159, -11], [212, -11], [212, -25.], [159, -25.]]]]}
272
272
  assert datasets['3'].spatial == None
273
273
 
274
274
  @pytest.mark.options(SCHEMA_CATALOG_URL='https://example.com/schemas')
udata/rdf.py CHANGED
@@ -21,6 +21,7 @@ log = logging.getLogger(__name__)
21
21
  # Extra Namespaces
22
22
  ADMS = Namespace('http://www.w3.org/ns/adms#')
23
23
  DCAT = Namespace('http://www.w3.org/ns/dcat#')
24
+ DCATAP = Namespace('http://data.europa.eu/r5r/')
24
25
  HYDRA = Namespace('http://www.w3.org/ns/hydra/core#')
25
26
  SCHEMA = Namespace('http://schema.org/')
26
27
  SCV = Namespace('http://purl.org/NET/scovo#')
@@ -35,6 +36,7 @@ VCARD = Namespace('http://www.w3.org/2006/vcard/ns#')
35
36
 
36
37
  namespace_manager = NamespaceManager(Graph())
37
38
  namespace_manager.bind('dcat', DCAT)
39
+ namespace_manager.bind('dcatap', DCATAP)
38
40
  namespace_manager.bind('dct', DCT)
39
41
  namespace_manager.bind('foaf', FOAF)
40
42
  namespace_manager.bind('foaf', FOAF)
@@ -98,6 +100,8 @@ RDF_EXTENSIONS = {
98
100
  # Includes control characters, unicode surrogate characters and unicode end-of-plane non-characters
99
101
  ILLEGAL_XML_CHARS = '[\x00-\x08\x0b\x0c\x0e-\x1F\uD800-\uDFFF\uFFFE\uFFFF]'
100
102
 
103
+ HVD_LEGISLATION = 'http://data.europa.eu/eli/reg_impl/2023/138/oj'
104
+
101
105
 
102
106
  def guess_format(string):
103
107
  '''Guess format given an extension or a mime-type'''