udata 10.8.3.dev37131__py2.py3-none-any.whl → 10.8.3.dev37170__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of udata might be problematic. Click here for more details.

@@ -72,16 +72,16 @@ class DcatBackend(BaseBackend):
72
72
  fmt = self.get_format()
73
73
  self.job.data = {"format": fmt}
74
74
 
75
- serialized_graphs = []
75
+ pages = []
76
76
 
77
77
  for page_number, page in self.walk_graph(self.source.url, fmt):
78
78
  self.process_one_datasets_page(page_number, page)
79
- serialized_graphs.append(page.serialize(format=fmt, indent=None))
79
+ pages.append((page_number, page))
80
80
 
81
81
  # We do a second pass to have all datasets in memory and attach datasets
82
82
  # to dataservices. It could be better to be one pass of graph walking and
83
83
  # then one pass of attaching datasets to dataservices.
84
- for page_number, page in self.walk_graph(self.source.url, fmt):
84
+ for page_number, page in pages:
85
85
  self.process_one_dataservices_page(page_number, page)
86
86
 
87
87
  if not self.dryrun and self.has_reached_max_items():
@@ -100,6 +100,8 @@ class DcatBackend(BaseBackend):
100
100
 
101
101
  bucket = current_app.config.get("HARVEST_GRAPHS_S3_BUCKET")
102
102
 
103
+ serialized_graphs = [p.serialize(format=fmt, indent=None) for _, p in pages]
104
+
103
105
  if (
104
106
  bucket is not None
105
107
  and sum([len(g.encode("utf-8")) for g in serialized_graphs])
@@ -202,7 +204,10 @@ class DcatBackend(BaseBackend):
202
204
  )
203
205
 
204
206
  def process_one_dataservices_page(self, page_number: int, page: Graph):
207
+ access_services = {o for _, _, o in page.triples((None, DCAT.accessService, None))}
205
208
  for node in page.subjects(RDF.type, DCAT.DataService):
209
+ if node in access_services:
210
+ continue
206
211
  remote_id = page.value(node, DCT.identifier)
207
212
  self.process_dataservice(remote_id, page_number=page_number, page=page, node=node)
208
213
 
@@ -159,6 +159,7 @@
159
159
  <dcat:accessURL>http://data.test.org/datasets/1/resources/4/services?SERVICE=WMS&amp;REQUEST=GetCapabilities&amp;VERSION=1.3.0</dcat:accessURL>
160
160
  <dcat:accessService>
161
161
  <dcat:DataService>
162
+ <rdf:type rdf:resource="http://www.w3.org/ns/dcat#DataService"/>
162
163
  <dcterms:title xml:lang="fr">Geo Service</dcterms:title>
163
164
  <dcat:endpointURL rdf:resource="http://data.test.org/datasets/1/resources/4/services"/>
164
165
  <dcat:endpointDescription rdf:resource="http://data.test.org/datasets/1/resources/4/services?SERVICE=WMS&amp;REQUEST=GetCapabilities&amp;VERSION=1.3.0"/>
@@ -187,6 +187,23 @@ class DcatBackendTest:
187
187
  == "https://data.paris2024.org/api/explore/v2.1/console"
188
188
  )
189
189
 
190
+ def test_harvest_dataservices_ignore_accessservices(self, rmock):
191
+ rmock.get("https://example.com/schemas", json=ResourceSchemaMockData.get_mock_data())
192
+
193
+ url = mock_dcat(rmock, "catalog.xml")
194
+ org = OrganizationFactory()
195
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
196
+
197
+ actions.run(source)
198
+
199
+ source.reload()
200
+
201
+ job = source.get_last_job()
202
+ assert len(job.items) == 4
203
+
204
+ dataservices = Dataservice.objects
205
+ assert len(dataservices) == 0
206
+
190
207
  def test_harvest_literal_spatial(self, rmock):
191
208
  url = mock_dcat(rmock, "evian.json")
192
209
  org = OrganizationFactory()
@@ -478,12 +495,8 @@ class DcatBackendTest:
478
495
 
479
496
  assert job.status == "done"
480
497
  assert job.errors == []
481
- assert len(job.items) == 5
482
- # 4 datasets and one Dataservice mentionned but not described
483
- # because it appears in a distribution as DCAT.accessService
484
- # but is missing a proper DCT.identifier
498
+ assert len(job.items) == 4
485
499
  assert len([item for item in job.items if item.status == "done"]) == 4
486
- assert len([item for item in job.items if item.status == "skipped"]) == 1
487
500
 
488
501
  def test_xml_catalog(self, rmock):
489
502
  LicenseFactory(id="lov2", title="Licence Ouverte Version 2.0")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: udata
3
- Version: 10.8.3.dev37131
3
+ Version: 10.8.3.dev37170
4
4
  Summary: Open data portal
5
5
  Home-page: https://github.com/opendatateam/udata
6
6
  Author: Opendata Team
@@ -145,7 +145,9 @@ It is collectively taken care of by members of the
145
145
 
146
146
  - Count dataservices and reuses for datasets based on signal [#3335](https://github.com/opendatateam/udata/pull/3335)
147
147
  :warning: the job `update-datasets-reuses-metrics` disappears, you should unschedule it before installing this version
148
-
148
+ - Cache dcat harvest pages to avoid two rounds of requests [#3398](https://github.com/opendatateam/udata/pull/3398)
149
+ - Ignore Dataset.accessService when processing DataService [#3399](https://github.com/opendatateam/udata/pull/3399)
150
+
149
151
  ## 10.8.2 (2025-07-31)
150
152
 
151
153
  - Add integrity checks on non existent list field instead of empty [#3242](https://github.com/opendatateam/udata/pull/3242)
@@ -300,7 +300,7 @@ udata/harvest/signals.py,sha256=3AhFHMPIFH5vz01NX5ycR_RWH14MXFWnCT6__LSa-QI,1338
300
300
  udata/harvest/tasks.py,sha256=id5YmHIhnkgez0LVC1fNg_6Yz1Sp0jrvQ1caslVOWdY,1722
301
301
  udata/harvest/backends/__init__.py,sha256=QjoFfBJfpw_xgk5YYWI1SgKJOMEmTMlxSfW79GNkSTI,459
302
302
  udata/harvest/backends/base.py,sha256=2wyfw83e3xGQcHnQI-z26g1dg-uVtWcDgzsBk7iGX3Y,17480
303
- udata/harvest/backends/dcat.py,sha256=IfUmqHy68Tt30E_kn4riExfgb-d7qPa--7KjCLCsia0,18637
303
+ udata/harvest/backends/dcat.py,sha256=aBdCiKcmZoDmjsmZVP_S1DW5MO0h1Um8Zqj8knJwN-k,18804
304
304
  udata/harvest/backends/maaf.py,sha256=N7ty8ZWO9pfKPtZRk1wTaJ5pY6qi-0-GtF1p8jiYiY4,8102
305
305
  udata/harvest/backends/maaf.xsd,sha256=vEyG8Vqw7Yn_acjRdXjqUJgxOj4pv8bibep-FX-f3BQ,18322
306
306
  udata/harvest/backends/ckan/__init__.py,sha256=JE7Qa7kX7Yd8OvmJnAO_NupZe0tqYyhhkgJ-iGNxX64,35
@@ -314,7 +314,7 @@ udata/harvest/tests/person.jsonld,sha256=I7Ynh-PQlNeD51I1LrCgYOEjhL-WBeb65xzIE_s
314
314
  udata/harvest/tests/test_actions.py,sha256=oXNTwDuSHtlVANRcsNyq3J33u4XU_jM9o67FbZjLCys,24860
315
315
  udata/harvest/tests/test_api.py,sha256=gSuICkPy3KVRUhHAyudXVf_gLwiB7SoriUp3DLXWDdA,21611
316
316
  udata/harvest/tests/test_base_backend.py,sha256=ow8ecGtD836mUqyPWYjkS5nx0STyT5RMLgBdDyOhts4,19233
317
- udata/harvest/tests/test_dcat_backend.py,sha256=6CKQmhlPf5Ceipj6Nj0qKb-myb48f003kwlgIvHQAi8,43568
317
+ udata/harvest/tests/test_dcat_backend.py,sha256=xe8X4Q88-Y50IQYT3Q9lz2rcFeb2ADIg1wLz-I7-ewc,43835
318
318
  udata/harvest/tests/test_filters.py,sha256=PT2qopEIoXsqi8MsNDRuhNH7jGXiQo8r0uJrCOUd4aM,2465
319
319
  udata/harvest/tests/test_models.py,sha256=f9NRR2_S4oZFgF8qOumg0vv-lpnEBJbI5vNtcwFdSqM,831
320
320
  udata/harvest/tests/test_notifications.py,sha256=MMzTzkv-GXMNFeOwAi31rdTsAXyLCLOSna41zOtaJG0,816
@@ -335,7 +335,7 @@ udata/harvest/tests/csw_dcat/geonetworkv4-page-3.xml,sha256=fsN0E4TVd_ts-sYA612y
335
335
  udata/harvest/tests/csw_dcat/geonetworkv4-page-5.xml,sha256=0VmPp1kspik7YAmOFyr-3yJLzWGA6kuQp_x_w-W385o,21213
336
336
  udata/harvest/tests/dcat/bnodes.jsonld,sha256=Leqny-ccp30564yojQYYckw_HKbhR0f5qUCaavc2ruE,7964
337
337
  udata/harvest/tests/dcat/bnodes.xml,sha256=bjG-pE2jDuJ7ZNDzQV4JEiMeAHCeX5eMQyUcDecVQ08,11333
338
- udata/harvest/tests/dcat/catalog.xml,sha256=ZBBH3JntmnmK9F9tGABvlfoXU4gBPOJTWL4942mhsto,12339
338
+ udata/harvest/tests/dcat/catalog.xml,sha256=1tfktnFDEcJrJWSj_yt0zZVE5j8p-kiUanW9V-oxOrs,12412
339
339
  udata/harvest/tests/dcat/evian.json,sha256=R3RxP5azUuf9aZ9fU7n6iJkfbJ6oj-Zej2cjOtkYr8M,16647
340
340
  udata/harvest/tests/dcat/flat.jsonld,sha256=BAw08MDhtW9Px3q6RAoTIqO_OwJmAwBS9EpC8BY_x98,8459
341
341
  udata/harvest/tests/dcat/geonetwork.xml,sha256=9_pksE74Zzkbgs9okj6hEbo8CJS0FZjEnIdvopKfm7k,7928
@@ -758,9 +758,9 @@ udata/translations/pt/LC_MESSAGES/udata.mo,sha256=ogtEGsjwIkOEtzrI49e57izi-NUim5
758
758
  udata/translations/pt/LC_MESSAGES/udata.po,sha256=1omSlqdMEdwYtzHGalXrtptVk14mlb6JS_8xR8BsznU,48428
759
759
  udata/translations/sr/LC_MESSAGES/udata.mo,sha256=V7unDjO31PNLiRhfEkYU3pU47wWWct-otdL0GDma0S4,28692
760
760
  udata/translations/sr/LC_MESSAGES/udata.po,sha256=wVBipfpPl-DmYeVm5wrlzWWM3oEvuruDVbJHx4I1p8s,55170
761
- udata-10.8.3.dev37131.dist-info/LICENSE,sha256=V8j_M8nAz8PvAOZQocyRDX7keai8UJ9skgmnwqETmdY,34520
762
- udata-10.8.3.dev37131.dist-info/METADATA,sha256=IxyytporWPEDZ9gAw6TzHusA2ienapaEBgen9d7R1m4,153302
763
- udata-10.8.3.dev37131.dist-info/WHEEL,sha256=Kh9pAotZVRFj97E15yTA4iADqXdQfIVTHcNaZTjxeGM,110
764
- udata-10.8.3.dev37131.dist-info/entry_points.txt,sha256=v2u12qO11i2lyLNIp136WmLJ-NHT-Kew3Duu8J-AXPM,614
765
- udata-10.8.3.dev37131.dist-info/top_level.txt,sha256=39OCg-VWFWOq4gCKnjKNu-s3OwFlZIu_dVH8Gl6ndHw,12
766
- udata-10.8.3.dev37131.dist-info/RECORD,,
761
+ udata-10.8.3.dev37170.dist-info/LICENSE,sha256=V8j_M8nAz8PvAOZQocyRDX7keai8UJ9skgmnwqETmdY,34520
762
+ udata-10.8.3.dev37170.dist-info/METADATA,sha256=TOsb22KX4silyxYmNmOHWZPoYAIP54nZn2ZKsijar_k,153533
763
+ udata-10.8.3.dev37170.dist-info/WHEEL,sha256=Kh9pAotZVRFj97E15yTA4iADqXdQfIVTHcNaZTjxeGM,110
764
+ udata-10.8.3.dev37170.dist-info/entry_points.txt,sha256=v2u12qO11i2lyLNIp136WmLJ-NHT-Kew3Duu8J-AXPM,614
765
+ udata-10.8.3.dev37170.dist-info/top_level.txt,sha256=39OCg-VWFWOq4gCKnjKNu-s3OwFlZIu_dVH8Gl6ndHw,12
766
+ udata-10.8.3.dev37170.dist-info/RECORD,,