udata 10.8.3.dev37131__py2.py3-none-any.whl → 10.8.3.dev37170__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of udata might be problematic. Click here for more details.
- udata/harvest/backends/dcat.py +8 -3
- udata/harvest/tests/dcat/catalog.xml +1 -0
- udata/harvest/tests/test_dcat_backend.py +18 -5
- {udata-10.8.3.dev37131.dist-info → udata-10.8.3.dev37170.dist-info}/METADATA +4 -2
- {udata-10.8.3.dev37131.dist-info → udata-10.8.3.dev37170.dist-info}/RECORD +9 -9
- {udata-10.8.3.dev37131.dist-info → udata-10.8.3.dev37170.dist-info}/LICENSE +0 -0
- {udata-10.8.3.dev37131.dist-info → udata-10.8.3.dev37170.dist-info}/WHEEL +0 -0
- {udata-10.8.3.dev37131.dist-info → udata-10.8.3.dev37170.dist-info}/entry_points.txt +0 -0
- {udata-10.8.3.dev37131.dist-info → udata-10.8.3.dev37170.dist-info}/top_level.txt +0 -0
udata/harvest/backends/dcat.py
CHANGED
|
@@ -72,16 +72,16 @@ class DcatBackend(BaseBackend):
|
|
|
72
72
|
fmt = self.get_format()
|
|
73
73
|
self.job.data = {"format": fmt}
|
|
74
74
|
|
|
75
|
-
|
|
75
|
+
pages = []
|
|
76
76
|
|
|
77
77
|
for page_number, page in self.walk_graph(self.source.url, fmt):
|
|
78
78
|
self.process_one_datasets_page(page_number, page)
|
|
79
|
-
|
|
79
|
+
pages.append((page_number, page))
|
|
80
80
|
|
|
81
81
|
# We do a second pass to have all datasets in memory and attach datasets
|
|
82
82
|
# to dataservices. It could be better to be one pass of graph walking and
|
|
83
83
|
# then one pass of attaching datasets to dataservices.
|
|
84
|
-
for page_number, page in
|
|
84
|
+
for page_number, page in pages:
|
|
85
85
|
self.process_one_dataservices_page(page_number, page)
|
|
86
86
|
|
|
87
87
|
if not self.dryrun and self.has_reached_max_items():
|
|
@@ -100,6 +100,8 @@ class DcatBackend(BaseBackend):
|
|
|
100
100
|
|
|
101
101
|
bucket = current_app.config.get("HARVEST_GRAPHS_S3_BUCKET")
|
|
102
102
|
|
|
103
|
+
serialized_graphs = [p.serialize(format=fmt, indent=None) for _, p in pages]
|
|
104
|
+
|
|
103
105
|
if (
|
|
104
106
|
bucket is not None
|
|
105
107
|
and sum([len(g.encode("utf-8")) for g in serialized_graphs])
|
|
@@ -202,7 +204,10 @@ class DcatBackend(BaseBackend):
|
|
|
202
204
|
)
|
|
203
205
|
|
|
204
206
|
def process_one_dataservices_page(self, page_number: int, page: Graph):
|
|
207
|
+
access_services = {o for _, _, o in page.triples((None, DCAT.accessService, None))}
|
|
205
208
|
for node in page.subjects(RDF.type, DCAT.DataService):
|
|
209
|
+
if node in access_services:
|
|
210
|
+
continue
|
|
206
211
|
remote_id = page.value(node, DCT.identifier)
|
|
207
212
|
self.process_dataservice(remote_id, page_number=page_number, page=page, node=node)
|
|
208
213
|
|
|
@@ -159,6 +159,7 @@
|
|
|
159
159
|
<dcat:accessURL>http://data.test.org/datasets/1/resources/4/services?SERVICE=WMS&REQUEST=GetCapabilities&VERSION=1.3.0</dcat:accessURL>
|
|
160
160
|
<dcat:accessService>
|
|
161
161
|
<dcat:DataService>
|
|
162
|
+
<rdf:type rdf:resource="http://www.w3.org/ns/dcat#DataService"/>
|
|
162
163
|
<dcterms:title xml:lang="fr">Geo Service</dcterms:title>
|
|
163
164
|
<dcat:endpointURL rdf:resource="http://data.test.org/datasets/1/resources/4/services"/>
|
|
164
165
|
<dcat:endpointDescription rdf:resource="http://data.test.org/datasets/1/resources/4/services?SERVICE=WMS&REQUEST=GetCapabilities&VERSION=1.3.0"/>
|
|
@@ -187,6 +187,23 @@ class DcatBackendTest:
|
|
|
187
187
|
== "https://data.paris2024.org/api/explore/v2.1/console"
|
|
188
188
|
)
|
|
189
189
|
|
|
190
|
+
def test_harvest_dataservices_ignore_accessservices(self, rmock):
|
|
191
|
+
rmock.get("https://example.com/schemas", json=ResourceSchemaMockData.get_mock_data())
|
|
192
|
+
|
|
193
|
+
url = mock_dcat(rmock, "catalog.xml")
|
|
194
|
+
org = OrganizationFactory()
|
|
195
|
+
source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
|
|
196
|
+
|
|
197
|
+
actions.run(source)
|
|
198
|
+
|
|
199
|
+
source.reload()
|
|
200
|
+
|
|
201
|
+
job = source.get_last_job()
|
|
202
|
+
assert len(job.items) == 4
|
|
203
|
+
|
|
204
|
+
dataservices = Dataservice.objects
|
|
205
|
+
assert len(dataservices) == 0
|
|
206
|
+
|
|
190
207
|
def test_harvest_literal_spatial(self, rmock):
|
|
191
208
|
url = mock_dcat(rmock, "evian.json")
|
|
192
209
|
org = OrganizationFactory()
|
|
@@ -478,12 +495,8 @@ class DcatBackendTest:
|
|
|
478
495
|
|
|
479
496
|
assert job.status == "done"
|
|
480
497
|
assert job.errors == []
|
|
481
|
-
assert len(job.items) ==
|
|
482
|
-
# 4 datasets and one Dataservice mentionned but not described
|
|
483
|
-
# because it appears in a distribution as DCAT.accessService
|
|
484
|
-
# but is missing a proper DCT.identifier
|
|
498
|
+
assert len(job.items) == 4
|
|
485
499
|
assert len([item for item in job.items if item.status == "done"]) == 4
|
|
486
|
-
assert len([item for item in job.items if item.status == "skipped"]) == 1
|
|
487
500
|
|
|
488
501
|
def test_xml_catalog(self, rmock):
|
|
489
502
|
LicenseFactory(id="lov2", title="Licence Ouverte Version 2.0")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: udata
|
|
3
|
-
Version: 10.8.3.
|
|
3
|
+
Version: 10.8.3.dev37170
|
|
4
4
|
Summary: Open data portal
|
|
5
5
|
Home-page: https://github.com/opendatateam/udata
|
|
6
6
|
Author: Opendata Team
|
|
@@ -145,7 +145,9 @@ It is collectively taken care of by members of the
|
|
|
145
145
|
|
|
146
146
|
- Count dataservices and reuses for datasets based on signal [#3335](https://github.com/opendatateam/udata/pull/3335)
|
|
147
147
|
:warning: the job `update-datasets-reuses-metrics` disappears, you should unschedule it before installing this version
|
|
148
|
-
|
|
148
|
+
- Cache dcat harvest pages to avoid two rounds of requests [#3398](https://github.com/opendatateam/udata/pull/3398)
|
|
149
|
+
- Ignore Dataset.accessService when processing DataService [#3399](https://github.com/opendatateam/udata/pull/3399)
|
|
150
|
+
|
|
149
151
|
## 10.8.2 (2025-07-31)
|
|
150
152
|
|
|
151
153
|
- Add integrity checks on non existent list field instead of empty [#3242](https://github.com/opendatateam/udata/pull/3242)
|
|
@@ -300,7 +300,7 @@ udata/harvest/signals.py,sha256=3AhFHMPIFH5vz01NX5ycR_RWH14MXFWnCT6__LSa-QI,1338
|
|
|
300
300
|
udata/harvest/tasks.py,sha256=id5YmHIhnkgez0LVC1fNg_6Yz1Sp0jrvQ1caslVOWdY,1722
|
|
301
301
|
udata/harvest/backends/__init__.py,sha256=QjoFfBJfpw_xgk5YYWI1SgKJOMEmTMlxSfW79GNkSTI,459
|
|
302
302
|
udata/harvest/backends/base.py,sha256=2wyfw83e3xGQcHnQI-z26g1dg-uVtWcDgzsBk7iGX3Y,17480
|
|
303
|
-
udata/harvest/backends/dcat.py,sha256=
|
|
303
|
+
udata/harvest/backends/dcat.py,sha256=aBdCiKcmZoDmjsmZVP_S1DW5MO0h1Um8Zqj8knJwN-k,18804
|
|
304
304
|
udata/harvest/backends/maaf.py,sha256=N7ty8ZWO9pfKPtZRk1wTaJ5pY6qi-0-GtF1p8jiYiY4,8102
|
|
305
305
|
udata/harvest/backends/maaf.xsd,sha256=vEyG8Vqw7Yn_acjRdXjqUJgxOj4pv8bibep-FX-f3BQ,18322
|
|
306
306
|
udata/harvest/backends/ckan/__init__.py,sha256=JE7Qa7kX7Yd8OvmJnAO_NupZe0tqYyhhkgJ-iGNxX64,35
|
|
@@ -314,7 +314,7 @@ udata/harvest/tests/person.jsonld,sha256=I7Ynh-PQlNeD51I1LrCgYOEjhL-WBeb65xzIE_s
|
|
|
314
314
|
udata/harvest/tests/test_actions.py,sha256=oXNTwDuSHtlVANRcsNyq3J33u4XU_jM9o67FbZjLCys,24860
|
|
315
315
|
udata/harvest/tests/test_api.py,sha256=gSuICkPy3KVRUhHAyudXVf_gLwiB7SoriUp3DLXWDdA,21611
|
|
316
316
|
udata/harvest/tests/test_base_backend.py,sha256=ow8ecGtD836mUqyPWYjkS5nx0STyT5RMLgBdDyOhts4,19233
|
|
317
|
-
udata/harvest/tests/test_dcat_backend.py,sha256=
|
|
317
|
+
udata/harvest/tests/test_dcat_backend.py,sha256=xe8X4Q88-Y50IQYT3Q9lz2rcFeb2ADIg1wLz-I7-ewc,43835
|
|
318
318
|
udata/harvest/tests/test_filters.py,sha256=PT2qopEIoXsqi8MsNDRuhNH7jGXiQo8r0uJrCOUd4aM,2465
|
|
319
319
|
udata/harvest/tests/test_models.py,sha256=f9NRR2_S4oZFgF8qOumg0vv-lpnEBJbI5vNtcwFdSqM,831
|
|
320
320
|
udata/harvest/tests/test_notifications.py,sha256=MMzTzkv-GXMNFeOwAi31rdTsAXyLCLOSna41zOtaJG0,816
|
|
@@ -335,7 +335,7 @@ udata/harvest/tests/csw_dcat/geonetworkv4-page-3.xml,sha256=fsN0E4TVd_ts-sYA612y
|
|
|
335
335
|
udata/harvest/tests/csw_dcat/geonetworkv4-page-5.xml,sha256=0VmPp1kspik7YAmOFyr-3yJLzWGA6kuQp_x_w-W385o,21213
|
|
336
336
|
udata/harvest/tests/dcat/bnodes.jsonld,sha256=Leqny-ccp30564yojQYYckw_HKbhR0f5qUCaavc2ruE,7964
|
|
337
337
|
udata/harvest/tests/dcat/bnodes.xml,sha256=bjG-pE2jDuJ7ZNDzQV4JEiMeAHCeX5eMQyUcDecVQ08,11333
|
|
338
|
-
udata/harvest/tests/dcat/catalog.xml,sha256=
|
|
338
|
+
udata/harvest/tests/dcat/catalog.xml,sha256=1tfktnFDEcJrJWSj_yt0zZVE5j8p-kiUanW9V-oxOrs,12412
|
|
339
339
|
udata/harvest/tests/dcat/evian.json,sha256=R3RxP5azUuf9aZ9fU7n6iJkfbJ6oj-Zej2cjOtkYr8M,16647
|
|
340
340
|
udata/harvest/tests/dcat/flat.jsonld,sha256=BAw08MDhtW9Px3q6RAoTIqO_OwJmAwBS9EpC8BY_x98,8459
|
|
341
341
|
udata/harvest/tests/dcat/geonetwork.xml,sha256=9_pksE74Zzkbgs9okj6hEbo8CJS0FZjEnIdvopKfm7k,7928
|
|
@@ -758,9 +758,9 @@ udata/translations/pt/LC_MESSAGES/udata.mo,sha256=ogtEGsjwIkOEtzrI49e57izi-NUim5
|
|
|
758
758
|
udata/translations/pt/LC_MESSAGES/udata.po,sha256=1omSlqdMEdwYtzHGalXrtptVk14mlb6JS_8xR8BsznU,48428
|
|
759
759
|
udata/translations/sr/LC_MESSAGES/udata.mo,sha256=V7unDjO31PNLiRhfEkYU3pU47wWWct-otdL0GDma0S4,28692
|
|
760
760
|
udata/translations/sr/LC_MESSAGES/udata.po,sha256=wVBipfpPl-DmYeVm5wrlzWWM3oEvuruDVbJHx4I1p8s,55170
|
|
761
|
-
udata-10.8.3.
|
|
762
|
-
udata-10.8.3.
|
|
763
|
-
udata-10.8.3.
|
|
764
|
-
udata-10.8.3.
|
|
765
|
-
udata-10.8.3.
|
|
766
|
-
udata-10.8.3.
|
|
761
|
+
udata-10.8.3.dev37170.dist-info/LICENSE,sha256=V8j_M8nAz8PvAOZQocyRDX7keai8UJ9skgmnwqETmdY,34520
|
|
762
|
+
udata-10.8.3.dev37170.dist-info/METADATA,sha256=TOsb22KX4silyxYmNmOHWZPoYAIP54nZn2ZKsijar_k,153533
|
|
763
|
+
udata-10.8.3.dev37170.dist-info/WHEEL,sha256=Kh9pAotZVRFj97E15yTA4iADqXdQfIVTHcNaZTjxeGM,110
|
|
764
|
+
udata-10.8.3.dev37170.dist-info/entry_points.txt,sha256=v2u12qO11i2lyLNIp136WmLJ-NHT-Kew3Duu8J-AXPM,614
|
|
765
|
+
udata-10.8.3.dev37170.dist-info/top_level.txt,sha256=39OCg-VWFWOq4gCKnjKNu-s3OwFlZIu_dVH8Gl6ndHw,12
|
|
766
|
+
udata-10.8.3.dev37170.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|