udata 7.0.6__py2.py3-none-any.whl → 7.0.6.dev28180__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of udata might be problematic. Click here for more details.

Files changed (60) hide show
  1. udata/__init__.py +1 -1
  2. udata/commands/__init__.py +3 -0
  3. udata/commands/dcat.py +4 -7
  4. udata/core/activity/api.py +1 -1
  5. udata/core/activity/models.py +3 -4
  6. udata/core/activity/tasks.py +5 -6
  7. udata/core/dataset/factories.py +4 -2
  8. udata/core/dataset/models.py +5 -2
  9. udata/core/dataset/rdf.py +25 -65
  10. udata/core/dataset/search.py +1 -0
  11. udata/core/spatial/tests/test_api.py +10 -7
  12. udata/core/topic/factories.py +2 -2
  13. udata/harvest/backends/dcat.py +24 -128
  14. udata/harvest/tests/dcat/bnodes.xml +1 -7
  15. udata/harvest/tests/test_dcat_backend.py +9 -82
  16. udata/rdf.py +1 -22
  17. udata/search/__init__.py +2 -2
  18. udata/static/chunks/{11.a23c110811a9ac943478.js → 11.c0ccea08914b6b41568e.js} +3 -3
  19. udata/static/chunks/{11.a23c110811a9ac943478.js.map → 11.c0ccea08914b6b41568e.js.map} +1 -1
  20. udata/static/chunks/{13.0889e093f8664e38568c.js → 13.526a25163ababaa44409.js} +2 -2
  21. udata/static/chunks/{13.0889e093f8664e38568c.js.map → 13.526a25163ababaa44409.js.map} +1 -1
  22. udata/static/chunks/{16.f41599478d3e97ad9a30.js → 16.7901839b4227881947f6.js} +2 -2
  23. udata/static/chunks/{16.f41599478d3e97ad9a30.js.map → 16.7901839b4227881947f6.js.map} +1 -1
  24. udata/static/chunks/{19.2b534a26af8b17e9170b.js → 19.471d5a2a08eef6e5338a.js} +3 -3
  25. udata/static/chunks/{19.2b534a26af8b17e9170b.js.map → 19.471d5a2a08eef6e5338a.js.map} +1 -1
  26. udata/static/chunks/{5.7115454a1183e5c12eef.js → 5.534e0531d0e2b150146f.js} +3 -3
  27. udata/static/chunks/{5.7115454a1183e5c12eef.js.map → 5.534e0531d0e2b150146f.js.map} +1 -1
  28. udata/static/chunks/{6.16bb24fb8240f2746488.js → 6.e56975229e6065f68d2a.js} +3 -3
  29. udata/static/chunks/{6.16bb24fb8240f2746488.js.map → 6.e56975229e6065f68d2a.js.map} +1 -1
  30. udata/static/chunks/{9.3e752966ff14e47e11f2.js → 9.534426728626f11f4571.js} +2 -2
  31. udata/static/chunks/{9.3e752966ff14e47e11f2.js.map → 9.534426728626f11f4571.js.map} +1 -1
  32. udata/static/common.js +1 -1
  33. udata/static/common.js.map +1 -1
  34. udata/tests/api/test_datasets_api.py +45 -45
  35. udata/tests/api/test_me_api.py +14 -13
  36. udata/tests/dataset/test_dataset_actions.py +2 -2
  37. udata/tests/dataset/test_dataset_commands.py +3 -3
  38. udata/tests/dataset/test_dataset_model.py +1 -2
  39. udata/tests/organization/test_organization_model.py +3 -3
  40. udata/tests/organization/test_organization_rdf.py +3 -3
  41. udata/tests/reuse/test_reuse_model.py +2 -2
  42. udata/tests/search/test_adapter.py +12 -12
  43. udata/tests/search/test_results.py +4 -4
  44. udata/tests/site/test_site_api.py +3 -3
  45. udata/tests/site/test_site_metrics.py +3 -3
  46. udata/tests/site/test_site_rdf.py +6 -6
  47. udata/tests/test_activity.py +0 -12
  48. udata/tests/test_transfer.py +17 -18
  49. {udata-7.0.6.dist-info → udata-7.0.6.dev28180.dist-info}/METADATA +4 -10
  50. {udata-7.0.6.dist-info → udata-7.0.6.dev28180.dist-info}/RECORD +54 -60
  51. {udata-7.0.6.dist-info → udata-7.0.6.dev28180.dist-info}/entry_points.txt +0 -1
  52. udata/harvest/tests/csw_dcat/XSLT.xml +0 -4298
  53. udata/harvest/tests/csw_dcat/geonetwork-iso-page-1.xml +0 -1291
  54. udata/harvest/tests/csw_dcat/geonetwork-iso-page-3.xml +0 -1139
  55. udata/harvest/tests/csw_dcat/geonetwork-iso-page-5.xml +0 -1266
  56. udata/harvest/tests/dcat/evian.json +0 -464
  57. udata/migrations/2024-03-22-migrate-activity-kwargs-to-extras.py +0 -16
  58. {udata-7.0.6.dist-info → udata-7.0.6.dev28180.dist-info}/LICENSE +0 -0
  59. {udata-7.0.6.dist-info → udata-7.0.6.dev28180.dist-info}/WHEEL +0 -0
  60. {udata-7.0.6.dist-info → udata-7.0.6.dev28180.dist-info}/top_level.txt +0 -0
@@ -1,22 +1,21 @@
1
- from datetime import date
2
1
  import logging
3
2
  import os
4
- import re
3
+ from flask import current_app
5
4
 
6
5
  import pytest
7
6
 
7
+ from datetime import date
8
8
  import boto3
9
- from flask import current_app
10
9
  import xml.etree.ElementTree as ET
11
-
12
10
  from udata.harvest.models import HarvestJob
11
+
13
12
  from udata.models import Dataset
14
13
  from udata.core.organization.factories import OrganizationFactory
15
14
  from udata.core.dataset.factories import LicenseFactory, ResourceSchemaMockData
16
15
  from udata.storage.s3 import get_from_json
17
16
 
18
17
  from .factories import HarvestSourceFactory
19
- from ..backends.dcat import URIS_TO_REPLACE, CswIso19139DcatBackend
18
+ from ..backends.dcat import URIS_TO_REPLACE
20
19
  from .. import actions
21
20
 
22
21
  log = logging.getLogger(__name__)
@@ -161,21 +160,6 @@ class DcatBackendTest:
161
160
  assert len(datasets['1'].resources) == 2
162
161
  assert len(datasets['2'].resources) == 2
163
162
 
164
- def test_harvest_literal_spatial(self, rmock):
165
- url = mock_dcat(rmock, 'evian.json')
166
- org = OrganizationFactory()
167
- source = HarvestSourceFactory(backend='dcat',
168
- url=url,
169
- organization=org)
170
-
171
- actions.run(source.slug)
172
-
173
- datasets = {d.harvest.dct_identifier: d for d in Dataset.objects}
174
- assert len(datasets) == 8
175
- assert datasets['https://www.arcgis.com/home/item.html?id=f6565516d1354383b25793e630cf3f2b&sublayer=5'].spatial is not None
176
- assert datasets['https://www.arcgis.com/home/item.html?id=f6565516d1354383b25793e630cf3f2b&sublayer=5'].spatial.geom == {'type': 'MultiPolygon', 'coordinates': [[[[6.5735, 46.3912], [6.6069, 46.3912], [6.6069, 46.4028], [6.5735, 46.4028], [6.5735, 46.3912]]]]}
177
-
178
-
179
163
  @pytest.mark.skip(reason="Mocking S3 requires `moto` which is not available for our current Python 3.7. We can manually test it.")
180
164
  @pytest.mark.options(SCHEMA_CATALOG_URL='https://example.com/schemas', HARVEST_JOBS_RETENTION_DAYS=0)
181
165
  # @mock_s3
@@ -211,8 +195,8 @@ class DcatBackendTest:
211
195
  assert resources_by_title['Resource 1-2'].schema.name == None
212
196
  assert resources_by_title['Resource 1-2'].schema.version == None
213
197
 
214
- assert datasets['2'].schema.name == 'RGF93 / Lambert-93 (EPSG:2154)'
215
- assert datasets['2'].schema.url == 'http://inspire.ec.europa.eu/glossary/SpatialReferenceSystem'
198
+ assert datasets['2'].schema.name == None
199
+ assert datasets['2'].schema.url == 'https://www.ecologie.gouv.fr/sites/default/files/R%C3%A9glementation%20IRVE.pdf'
216
200
  resources_by_title = { resource['title']: resource for resource in datasets['2'].resources }
217
201
 
218
202
  # Unknown schema are kept as they were provided
@@ -281,8 +265,8 @@ class DcatBackendTest:
281
265
  assert resources_by_title['Resource 1-2'].schema.name == None
282
266
  assert resources_by_title['Resource 1-2'].schema.version == None
283
267
 
284
- assert datasets['2'].schema.name == 'RGF93 / Lambert-93 (EPSG:2154)'
285
- assert datasets['2'].schema.url == 'http://inspire.ec.europa.eu/glossary/SpatialReferenceSystem'
268
+ assert datasets['2'].schema.name == None
269
+ assert datasets['2'].schema.url == 'https://www.ecologie.gouv.fr/sites/default/files/R%C3%A9glementation%20IRVE.pdf'
286
270
  resources_by_title = { resource['title']: resource for resource in datasets['2'].resources }
287
271
 
288
272
  # Unknown schema are kept as they were provided
@@ -630,7 +614,7 @@ class DcatBackendTest:
630
614
 
631
615
 
632
616
  @pytest.mark.usefixtures('clean_db')
633
- @pytest.mark.options(PLUGINS=['csw'])
617
+ @pytest.mark.options(PLUGINS=['csw-dcat'])
634
618
  class CswDcatBackendTest:
635
619
 
636
620
  def test_geonetworkv4(self, rmock):
@@ -678,60 +662,3 @@ class CswDcatBackendTest:
678
662
 
679
663
  assert 'User-Agent' in get_mock.last_request.headers
680
664
  assert get_mock.last_request.headers['User-Agent'] == 'uData/0.1 csw-dcat'
681
-
682
-
683
- @pytest.mark.usefixtures('clean_db')
684
- @pytest.mark.options(PLUGINS=['csw'])
685
- class CswIso19139DcatBackendTest:
686
-
687
- def test_geo2france(self, rmock):
688
-
689
- with open(os.path.join(CSW_DCAT_FILES_DIR, "XSLT.xml"), "r") as f:
690
- xslt = f.read()
691
- url = mock_csw_pagination(rmock, 'geonetwork/srv/eng/csw.rdf', 'geonetwork-iso-page-{}.xml')
692
- rmock.get(CswIso19139DcatBackend.XSL_URL, text=xslt)
693
- org = OrganizationFactory()
694
- source = HarvestSourceFactory(backend='csw-iso-19139',
695
- url=url,
696
- organization=org)
697
-
698
- actions.run(source.slug)
699
-
700
- source.reload()
701
-
702
- job = source.get_last_job()
703
- assert len(job.items) == 6
704
-
705
- datasets = {d.harvest.dct_identifier: d for d in Dataset.objects}
706
-
707
- assert len(datasets) == 6
708
-
709
- # First dataset
710
- # dataset identifier is gmd:RS_Identifier > gmd:codeSpace + gmd:code
711
- dataset = datasets['http://catalogue.geo-ide.developpement-durable.gouv.fr/fr-120066022-orphan-residentifier-140d31c6-643d-42a9-85df-2737a118e144']
712
- assert dataset.title == "Plan local d'urbanisme de la commune de Cartigny"
713
- assert dataset.description == "Le présent standard de données COVADIS concerne les documents de plans locaux d'urbanisme (PLU) et les plans d'occupation des sols (POS qui valent PLU)."
714
- assert set(dataset.tags) == set([
715
- 'amenagement-urbanisme-zonages-planification', 'cartigny',
716
- 'document-durbanisme', 'donnees-ouvertes', 'plu', 'usage-des-sols'
717
- ])
718
- assert dataset.harvest.created_at.date() == date(2017, 10, 7)
719
- assert dataset.spatial.geom == {'type': 'MultiPolygon', 'coordinates':
720
- [[[[3.28133559, 50.48188019], [1.31279111, 50.48188019], [1.31279111, 49.38547516], [3.28133559, 49.38547516], [3.28133559, 50.48188019]]]]
721
- }
722
- assert dataset.contact_point.name == 'DDTM 80 (Direction Départementale des Territoires et de la Mer de la Somme)'
723
- assert dataset.contact_point.email == 'ddtm-sap-bsig@somme.gouv.fr'
724
-
725
- # License is not properly mapped in XSLT conversion
726
- assert dataset.license is None
727
-
728
- # Distributions don't get properly mapped to distribution with this XSLT if missing CI_OnLineFunctionCode.
729
- # A CI_OnLineFunctionCode was added explicitely on one of the Online Resources.
730
- # (See mapping at: https://semiceu.github.io/GeoDCAT-AP/releases/2.0.0/#resource-locator---on-line-resource)
731
- assert len(dataset.resources) == 1
732
- resource = dataset.resources[0]
733
- assert resource.title == 'Téléchargement direct du lot et des documents associés'
734
- assert resource.url == 'http://atom.geo-ide.developpement-durable.gouv.fr/atomArchive/GetResource?id=fr-120066022-ldd-cab63273-b3ae-4e8a-ae1c-6192e45faa94&datasetAggregate=true'
735
-
736
- # Sadly resource format is parsed as a blank node. Format parsing should be improved.
737
- assert re.match(r'n[0-9a-f]{32}', resource.format)
udata/rdf.py CHANGED
@@ -12,7 +12,6 @@ from rdflib.namespace import (
12
12
  Namespace, NamespaceManager, DCTERMS, SKOS, FOAF, XSD, RDFS, RDF
13
13
  )
14
14
  from rdflib.util import SUFFIX_FORMAT_MAP, guess_format as raw_guess_format
15
- from udata import uris
16
15
  from udata.models import FieldValidationError, Schema, ResourceSchema
17
16
  from mongoengine import ValidationError
18
17
 
@@ -76,7 +75,6 @@ ACCEPTED_MIME_TYPES = {
76
75
  'application/ld+json': 'json-ld',
77
76
  'application/json': 'json-ld',
78
77
  'application/trig': 'trig',
79
- 'text/xml': 'xml',
80
78
  # Available but not activated
81
79
  # 'application/n-quads': 'nquads',
82
80
  # 'text/xml': 'trix',
@@ -238,26 +236,7 @@ def schema_from_rdf(rdf):
238
236
  if isinstance(resource, (URIRef, Literal)):
239
237
  schema.url = resource.toPython()
240
238
  elif isinstance(resource, RdfResource):
241
- # We try to get the schema "correct" URL.
242
- # 1. The identifier of the DCT.conformsTo
243
- # 2. The DCT.type inside the DCT.conformsTo (from some example it's the most precise one)
244
- # (other not currently used RDF.type)
245
- url = None
246
- try:
247
- url = uris.validate(resource.identifier.toPython())
248
- except uris.ValidationError:
249
- try:
250
- type = resource.value(DCT.type)
251
- if type is not None:
252
- url = uris.validate(type.identifier.toPython())
253
- except uris.ValidationError:
254
- pass
255
- pass
256
-
257
- if url is None:
258
- return None
259
-
260
- schema.url = url
239
+ schema.url = resource.identifier.toPython()
261
240
  schema.name = resource.value(DCT.title)
262
241
  else:
263
242
  return None
udata/search/__init__.py CHANGED
@@ -24,7 +24,7 @@ def reindex(classname, id):
24
24
  document = adapter_class.serialize(obj)
25
25
  if adapter_class.is_indexable(obj):
26
26
  log.info('Indexing %s (%s)', model.__name__, obj.id)
27
- url = f"{current_app.config['SEARCH_SERVICE_API_URL']}{adapter_class.search_url}index"
27
+ url = f"{current_app.config['SEARCH_SERVICE_API_URL']}{adapter_class.search_url}/index"
28
28
  try:
29
29
  payload = {
30
30
  'document': document
@@ -35,7 +35,7 @@ def reindex(classname, id):
35
35
  log.exception('Unable to index/unindex %s "%s"', model.__name__, str(obj.id))
36
36
  else:
37
37
  log.info('Unindexing %s (%s)', model.__name__, obj.id)
38
- url = f"{current_app.config['SEARCH_SERVICE_API_URL']}{adapter_class.search_url}{str(obj.id)}/unindex"
38
+ url = f"{current_app.config['SEARCH_SERVICE_API_URL']}{adapter_class.search_url}/{str(obj.id)}/unindex"
39
39
  try:
40
40
  r = requests.delete(url)
41
41
  if r.status_code == 404: