udata 7.0.5.dev27838__py2.py3-none-any.whl → 7.0.5.dev27942__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of udata might be problematic. Click here for more details.

Files changed (28) hide show
  1. udata/core/dataset/rdf.py +45 -1
  2. udata/core/organization/csv.py +27 -1
  3. udata/frontend/csv.py +8 -8
  4. udata/harvest/tests/dcat/bnodes.xml +16 -2
  5. udata/harvest/tests/test_dcat_backend.py +18 -0
  6. udata/static/chunks/{11.a23c110811a9ac943478.js → 11.c0ccea08914b6b41568e.js} +3 -3
  7. udata/static/chunks/{11.a23c110811a9ac943478.js.map → 11.c0ccea08914b6b41568e.js.map} +1 -1
  8. udata/static/chunks/{13.0889e093f8664e38568c.js → 13.526a25163ababaa44409.js} +2 -2
  9. udata/static/chunks/{13.0889e093f8664e38568c.js.map → 13.526a25163ababaa44409.js.map} +1 -1
  10. udata/static/chunks/{16.f41599478d3e97ad9a30.js → 16.7901839b4227881947f6.js} +2 -2
  11. udata/static/chunks/{16.f41599478d3e97ad9a30.js.map → 16.7901839b4227881947f6.js.map} +1 -1
  12. udata/static/chunks/{19.2b534a26af8b17e9170b.js → 19.471d5a2a08eef6e5338a.js} +3 -3
  13. udata/static/chunks/{19.2b534a26af8b17e9170b.js.map → 19.471d5a2a08eef6e5338a.js.map} +1 -1
  14. udata/static/chunks/{5.7115454a1183e5c12eef.js → 5.534e0531d0e2b150146f.js} +3 -3
  15. udata/static/chunks/{5.7115454a1183e5c12eef.js.map → 5.534e0531d0e2b150146f.js.map} +1 -1
  16. udata/static/chunks/{6.16bb24fb8240f2746488.js → 6.e56975229e6065f68d2a.js} +3 -3
  17. udata/static/chunks/{6.16bb24fb8240f2746488.js.map → 6.e56975229e6065f68d2a.js.map} +1 -1
  18. udata/static/chunks/{9.3e752966ff14e47e11f2.js → 9.534426728626f11f4571.js} +2 -2
  19. udata/static/chunks/{9.3e752966ff14e47e11f2.js.map → 9.534426728626f11f4571.js.map} +1 -1
  20. udata/static/common.js +1 -1
  21. udata/static/common.js.map +1 -1
  22. udata/tests/organization/test_csv_adapter.py +43 -0
  23. {udata-7.0.5.dev27838.dist-info → udata-7.0.5.dev27942.dist-info}/METADATA +5 -1
  24. {udata-7.0.5.dev27838.dist-info → udata-7.0.5.dev27942.dist-info}/RECORD +28 -27
  25. {udata-7.0.5.dev27838.dist-info → udata-7.0.5.dev27942.dist-info}/LICENSE +0 -0
  26. {udata-7.0.5.dev27838.dist-info → udata-7.0.5.dev27942.dist-info}/WHEEL +0 -0
  27. {udata-7.0.5.dev27838.dist-info → udata-7.0.5.dev27942.dist-info}/entry_points.txt +0 -0
  28. {udata-7.0.5.dev27838.dist-info → udata-7.0.5.dev27942.dist-info}/top_level.txt +0 -0
udata/core/dataset/rdf.py CHANGED
@@ -2,17 +2,21 @@
2
2
  This module centralize dataset helpers for RDF/DCAT serialization and parsing
3
3
  '''
4
4
  import calendar
5
+ import json
5
6
  import logging
6
7
 
7
8
  from datetime import date
8
9
  from html.parser import HTMLParser
9
10
  from dateutil.parser import parse as parse_dt
10
11
  from flask import current_app
12
+ from geomet import wkt
11
13
  from rdflib import Graph, URIRef, Literal, BNode
12
14
  from rdflib.resource import Resource as RdfResource
13
15
  from rdflib.namespace import RDF
16
+ from mongoengine.errors import ValidationError
14
17
 
15
18
  from udata import i18n, uris
19
+ from udata.core.spatial.models import SpatialCoverage
16
20
  from udata.frontend.markdown import parse_html
17
21
  from udata.core.dataset.models import HarvestDatasetMetadata, HarvestResourceMetadata
18
22
  from udata.models import db, ContactPoint
@@ -334,6 +338,42 @@ def contact_point_from_rdf(rdf, dataset):
334
338
  ContactPoint(name=name, email=email, owner=dataset.owner).save())
335
339
 
336
340
 
341
+ def spatial_from_rdf(term):
342
+ if term is None:
343
+ return None
344
+
345
+ for object in term.objects():
346
+ if isinstance(object, Literal):
347
+ if object.datatype.__str__() == 'https://www.iana.org/assignments/media-types/application/vnd.geo+json':
348
+ try:
349
+ geojson = json.loads(object.toPython())
350
+ except ValueError as e:
351
+ log.warning(f"Invalid JSON in spatial GeoJSON {object.toPython()} {e}")
352
+ continue
353
+ elif object.datatype.__str__() == 'http://www.opengis.net/rdf#wktLiteral':
354
+ try:
355
+ # .upper() si here because geomet doesn't support Polygon but only POLYGON
356
+ geojson = wkt.loads(object.toPython().strip().upper())
357
+ except ValueError as e:
358
+ log.warning(f"Invalid JSON in spatial WKT {object.toPython()} {e}")
359
+ continue
360
+ else:
361
+ continue
362
+
363
+ if geojson['type'] == 'Polygon':
364
+ geojson['type'] = 'MultiPolygon'
365
+ geojson['coordinates'] = [geojson['coordinates']]
366
+
367
+ spatial_coverage = SpatialCoverage(geom=geojson)
368
+
369
+ try:
370
+ spatial_coverage.clean()
371
+ return spatial_coverage
372
+ except ValidationError:
373
+ return None
374
+
375
+ return None
376
+
337
377
  def frequency_from_rdf(term):
338
378
  if isinstance(term, str):
339
379
  try:
@@ -488,7 +528,7 @@ def resource_from_rdf(graph_or_distrib, dataset=None, is_additionnal=False):
488
528
  return resource
489
529
 
490
530
 
491
- def dataset_from_rdf(graph, dataset=None, node=None):
531
+ def dataset_from_rdf(graph: Graph, dataset=None, node=None):
492
532
  '''
493
533
  Create or update a dataset from a RDF/DCAT graph
494
534
  '''
@@ -509,6 +549,10 @@ def dataset_from_rdf(graph, dataset=None, node=None):
509
549
  if schema:
510
550
  dataset.schema = schema
511
551
 
552
+ spatial_coverage = spatial_from_rdf(d.value(DCT.spatial))
553
+ if spatial_coverage:
554
+ dataset.spatial = spatial_coverage
555
+
512
556
  acronym = rdf_value(d, SKOS.altLabel)
513
557
  if acronym:
514
558
  dataset.acronym = acronym
@@ -1,3 +1,4 @@
1
+ from udata.core.dataset.models import Dataset
1
2
  from udata.frontend import csv
2
3
 
3
4
  from .models import Organization
@@ -5,6 +6,8 @@ from .models import Organization
5
6
 
6
7
  @csv.adapter(Organization)
7
8
  class OrganizationCsvAdapter(csv.Adapter):
9
+ downloads_counts = None
10
+
8
11
  fields = (
9
12
  'id',
10
13
  'name',
@@ -18,4 +21,27 @@ class OrganizationCsvAdapter(csv.Adapter):
18
21
  )
19
22
 
20
23
  def dynamic_fields(self):
21
- return csv.metric_fields(Organization)
24
+ return csv.metric_fields(Organization) + self.get_dynamic_field_downloads()
25
+
26
+ def get_dynamic_field_downloads(self):
27
+ downloads_counts = self.get_downloads_counts()
28
+ return [('downloads', lambda o: downloads_counts.get(str(o.id), 0))]
29
+
30
+ def get_downloads_counts(self):
31
+ '''
32
+ Prefetch all the resources' downloads for all selected organization into memory
33
+ '''
34
+ if self.downloads_counts is not None:
35
+ return self.downloads_counts
36
+
37
+ self.downloads_counts = {}
38
+
39
+ ids = [o.id for o in self.queryset]
40
+ for dataset in Dataset.objects(organization__in=ids):
41
+ org_id = str(dataset.organization.id)
42
+ if self.downloads_counts.get(org_id) is None:
43
+ self.downloads_counts[org_id] = 0
44
+
45
+ self.downloads_counts[org_id] += sum(resource.metrics.get('views', 0) for resource in dataset.resources)
46
+
47
+ return self.downloads_counts
udata/frontend/csv.py CHANGED
@@ -58,8 +58,8 @@ class Adapter(object):
58
58
  else:
59
59
  field_tuple = (name, self.getter(*field))
60
60
  except Exception as e: # Catch all errors intentionally.
61
- log.error('Error exporting CSV for {name}: {error}'.format(
62
- name=self.__class__.__name__, error=e))
61
+ log.error('Error exporting CSV for {name}: {error_class} {error}'.format(
62
+ name=self.__class__.__name__, error_class=e.__class__.__name__, error=e))
63
63
  self._fields.append(field_tuple)
64
64
  return self._fields
65
65
 
@@ -89,8 +89,8 @@ class Adapter(object):
89
89
  try:
90
90
  content = safestr(getter(obj))
91
91
  except Exception as e: # Catch all errors intentionally.
92
- log.error('Error exporting CSV for {name}: {error}'.format(
93
- name=self.__class__.__name__, error=e))
92
+ log.error('Error exporting CSV for {name}: {error_class} {error}'.format(
93
+ name=self.__class__.__name__, error_class=e.__class__.__name__, error=e))
94
94
  row.append(content)
95
95
  return row
96
96
 
@@ -130,8 +130,8 @@ class NestedAdapter(Adapter):
130
130
  else:
131
131
  field_tuple = (name, self.getter(*field))
132
132
  except Exception as e: # Catch all errors intentionally.
133
- log.error('Error exporting CSV for {name}: {error}'.format(
134
- name=self.__class__.__name__, error=e))
133
+ log.error('Error exporting CSV for {name}: {error_class} {error}'.format(
134
+ name=self.__class__.__name__, error_class=e.__class__.__name__, error=e))
135
135
  self._nested_fields.append(field_tuple)
136
136
  return self._nested_fields
137
137
 
@@ -155,8 +155,8 @@ class NestedAdapter(Adapter):
155
155
  try:
156
156
  content = safestr(getter(nested))
157
157
  except Exception as e: # Catch all errors intentionally.
158
- log.error('Error exporting CSV for {name}: {error}'.format(
159
- name=self.__class__.__name__, error=e))
158
+ log.error('Error exporting CSV for {name}: {error_class} {error}'.format(
159
+ name=self.__class__.__name__, error_class=e.__class__.__name__, error=e))
160
160
  row.append(content)
161
161
  return row
162
162
 
@@ -5,6 +5,8 @@
5
5
  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
6
6
  xmlns:dcat="http://www.w3.org/ns/dcat#"
7
7
  xmlns:dct="http://purl.org/dc/terms/"
8
+ xmlns:ogc="http://www.opengis.net/ogc"
9
+ xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#"
8
10
  xmlns:dcterms="http://purl.org/dc/terms/"
9
11
  xmlns:vcard="http://www.w3.org/2006/vcard/ns#"
10
12
  xmlns:schema="http://schema.org/"
@@ -54,7 +56,13 @@
54
56
  <owl:versionInfo>1.0</owl:versionInfo>
55
57
  <dcat:distribution rdf:resource="http://data.test.org/datasets/1/resources/2"/>
56
58
  <dcat:keyword>Tag 4</dcat:keyword>
57
- <dcterms:spatial rdf:resource="http://wuEurope.com/"/>
59
+ <dct:spatial>
60
+ <ogc:Polygon>
61
+ <geo:asWKT rdf:datatype="http://www.opengis.net/rdf#wktLiteral">
62
+ wrong wkt
63
+ </geo:asWKT>
64
+ </ogc:Polygon>
65
+ </dct:spatial>
58
66
  <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2016-12-14T19:01:24.184120</dcterms:modified>
59
67
  <dcat:keyword>Tag 2</dcat:keyword>
60
68
  <dcat:keyword>Tag 1</dcat:keyword>
@@ -79,7 +87,13 @@
79
87
  <dcat:keyword>Tag 3</dcat:keyword>
80
88
  <dcat:distribution rdf:resource="http://data.test.org/datasets/2/resources/2"/>
81
89
  <dcterms:title>Dataset 2</dcterms:title>
82
- <dcterms:spatial rdf:resource="http://wuEurope.com/"/>
90
+ <dct:spatial>
91
+ <ogc:Polygon>
92
+ <geo:asWKT rdf:datatype="http://www.opengis.net/rdf#wktLiteral">
93
+ Polygon((4.44641288 45.54214467, 4.44641288 46.01316963, 4.75655252 46.01316963, 4.75655252 45.54214467, 4.44641288 45.54214467))
94
+ </geo:asWKT>
95
+ </ogc:Polygon>
96
+ </dct:spatial>
83
97
  <dcterms:identifier>2</dcterms:identifier>
84
98
  <dct:conformsTo rdf:resource="https://www.ecologie.gouv.fr/sites/default/files/R%C3%A9glementation%20IRVE.pdf" />
85
99
  </dcat:Dataset>
@@ -156,6 +156,24 @@ class DcatBackendTest:
156
156
  assert len(datasets['1'].resources) == 2
157
157
  assert len(datasets['2'].resources) == 2
158
158
 
159
+
160
+ @pytest.mark.options(SCHEMA_CATALOG_URL='https://example.com/schemas')
161
+ def test_harvest_spatial(self, rmock):
162
+ rmock.get('https://example.com/schemas', json=ResourceSchemaMockData.get_mock_data())
163
+
164
+ filename = 'bnodes.xml'
165
+ url = mock_dcat(rmock, filename)
166
+ org = OrganizationFactory()
167
+ source = HarvestSourceFactory(backend='dcat', url=url, organization=org)
168
+
169
+ actions.run(source.slug)
170
+
171
+ datasets = {d.harvest.dct_identifier: d for d in Dataset.objects}
172
+
173
+ assert datasets['1'].spatial == None
174
+ assert datasets['2'].spatial.geom == {'type': 'MultiPolygon', 'coordinates': [[[[4.44641288, 45.54214467], [4.44641288, 46.01316963], [4.75655252, 46.01316963], [4.75655252, 45.54214467], [4.44641288, 45.54214467]]]]}
175
+ assert datasets['3'].spatial == None
176
+
159
177
  @pytest.mark.options(SCHEMA_CATALOG_URL='https://example.com/schemas')
160
178
  def test_harvest_schemas(self, rmock):
161
179
  rmock.get('https://example.com/schemas', json=ResourceSchemaMockData.get_mock_data())