udata 10.8.3.dev37170__py2.py3-none-any.whl → 10.8.3.dev37191__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of udata might be problematic. Click here for more details.

Files changed (31) hide show
  1. udata/core/dataset/api_fields.py +2 -0
  2. udata/core/dataset/apiv2.py +4 -0
  3. udata/core/dataset/constants.py +1 -0
  4. udata/core/dataset/csv.py +1 -0
  5. udata/core/dataset/forms.py +6 -0
  6. udata/core/dataset/models.py +2 -0
  7. udata/harvest/backends/dcat.py +140 -161
  8. udata/harvest/tests/test_dcat_backend.py +1 -1
  9. udata/settings.py +1 -1
  10. udata/static/chunks/{10.8ca60413647062717b1e.js → 10.471164b2a9fe15614797.js} +3 -3
  11. udata/static/chunks/{10.8ca60413647062717b1e.js.map → 10.471164b2a9fe15614797.js.map} +1 -1
  12. udata/static/chunks/{11.b6f741fcc366abfad9c4.js → 11.51d706fb9521c16976bc.js} +3 -3
  13. udata/static/chunks/{11.b6f741fcc366abfad9c4.js.map → 11.51d706fb9521c16976bc.js.map} +1 -1
  14. udata/static/chunks/{13.2d06442dd9a05d9777b5.js → 13.f29411b06be1883356a3.js} +2 -2
  15. udata/static/chunks/{13.2d06442dd9a05d9777b5.js.map → 13.f29411b06be1883356a3.js.map} +1 -1
  16. udata/static/chunks/{17.e8e4caaad5cb0cc0bacc.js → 17.3bd0340930d4a314ce9c.js} +2 -2
  17. udata/static/chunks/{17.e8e4caaad5cb0cc0bacc.js.map → 17.3bd0340930d4a314ce9c.js.map} +1 -1
  18. udata/static/chunks/{19.f03a102365af4315f9db.js → 19.8da42e8359d72afc2618.js} +3 -3
  19. udata/static/chunks/{19.f03a102365af4315f9db.js.map → 19.8da42e8359d72afc2618.js.map} +1 -1
  20. udata/static/chunks/{8.778091d55cd8ea39af6b.js → 8.54e44b102164ae5e7a67.js} +2 -2
  21. udata/static/chunks/{8.778091d55cd8ea39af6b.js.map → 8.54e44b102164ae5e7a67.js.map} +1 -1
  22. udata/static/chunks/{9.033d7e190ca9e226a5d0.js → 9.07515e5187f475bce828.js} +3 -3
  23. udata/static/chunks/{9.033d7e190ca9e226a5d0.js.map → 9.07515e5187f475bce828.js.map} +1 -1
  24. udata/static/common.js +1 -1
  25. udata/static/common.js.map +1 -1
  26. {udata-10.8.3.dev37170.dist-info → udata-10.8.3.dev37191.dist-info}/METADATA +5 -2
  27. {udata-10.8.3.dev37170.dist-info → udata-10.8.3.dev37191.dist-info}/RECORD +31 -31
  28. {udata-10.8.3.dev37170.dist-info → udata-10.8.3.dev37191.dist-info}/LICENSE +0 -0
  29. {udata-10.8.3.dev37170.dist-info → udata-10.8.3.dev37191.dist-info}/WHEEL +0 -0
  30. {udata-10.8.3.dev37170.dist-info → udata-10.8.3.dev37191.dist-info}/entry_points.txt +0 -0
  31. {udata-10.8.3.dev37170.dist-info → udata-10.8.3.dev37191.dist-info}/top_level.txt +0 -0
@@ -266,6 +266,7 @@ DEFAULT_MASK = ",".join(
266
266
  "acronym",
267
267
  "slug",
268
268
  "description",
269
+ "description_short",
269
270
  "created_at",
270
271
  "last_modified",
271
272
  "deleted",
@@ -327,6 +328,7 @@ dataset_fields = api.model(
327
328
  "description": fields.Markdown(
328
329
  description="The dataset description in markdown", required=True
329
330
  ),
331
+ "description_short": fields.String(description="The dataset short description"),
330
332
  "created_at": fields.ISODateTime(
331
333
  description="This date is computed between harvested creation date if any and site's internal creation date",
332
334
  required=True,
@@ -44,6 +44,7 @@ DEFAULT_MASK_APIV2 = ",".join(
44
44
  "acronym",
45
45
  "slug",
46
46
  "description",
47
+ "description_short",
47
48
  "created_at",
48
49
  "last_modified",
49
50
  "deleted",
@@ -105,6 +106,9 @@ dataset_fields = apiv2.model(
105
106
  "description": fields.Markdown(
106
107
  description="The dataset description in markdown", required=True
107
108
  ),
109
+ "description_short": fields.String(
110
+ description="The dataset short description", required=False
111
+ ),
108
112
  "created_at": fields.ISODateTime(
109
113
  description="The dataset creation date", required=True, readonly=True
110
114
  ),
@@ -89,5 +89,6 @@ SCHEMA_CACHE_DURATION = 60 * 5 # In seconds
89
89
 
90
90
  TITLE_SIZE_LIMIT = 350
91
91
  DESCRIPTION_SIZE_LIMIT = 100000
92
+ DESCRIPTION_SHORT_SIZE_LIMIT = 200
92
93
 
93
94
  FULL_OBJECTS_HEADER = "X-Get-Datasets-Full-Objects"
udata/core/dataset/csv.py CHANGED
@@ -26,6 +26,7 @@ class DatasetCsvAdapter(csv.Adapter):
26
26
  ("owner_id", "owner.id"),
27
27
  # 'contact_point', # ?
28
28
  "description",
29
+ "description_short",
29
30
  "frequency",
30
31
  "license",
31
32
  "temporal_coverage.start",
@@ -7,6 +7,7 @@ from udata.mongo.errors import FieldValidationError
7
7
  from .constants import (
8
8
  CHECKSUM_TYPES,
9
9
  DEFAULT_FREQUENCY,
10
+ DESCRIPTION_SHORT_SIZE_LIMIT,
10
11
  DESCRIPTION_SIZE_LIMIT,
11
12
  LEGACY_FREQUENCIES,
12
13
  RESOURCE_FILETYPES,
@@ -151,6 +152,11 @@ class DatasetForm(ModelForm):
151
152
  [validators.DataRequired(), validators.Length(max=DESCRIPTION_SIZE_LIMIT)],
152
153
  description=_("The details about the dataset (collection process, specifics...)."),
153
154
  )
155
+ description_short = fields.StringField(
156
+ _("Short description"),
157
+ [validators.Length(max=DESCRIPTION_SHORT_SIZE_LIMIT)],
158
+ description=_("A short description of the dataset."),
159
+ )
154
160
  license = fields.ModelSelectField(_("License"), model=License, allow_blank=True)
155
161
  frequency = fields.SelectField(
156
162
  _("Update frequency"),
@@ -35,6 +35,7 @@ from .constants import (
35
35
  CHECKSUM_TYPES,
36
36
  CLOSED_FORMATS,
37
37
  DEFAULT_LICENSE,
38
+ DESCRIPTION_SHORT_SIZE_LIMIT,
38
39
  LEGACY_FREQUENCIES,
39
40
  MAX_DISTANCE,
40
41
  PIVOTAL_DATA,
@@ -560,6 +561,7 @@ class Dataset(Auditable, WithMetrics, DatasetBadgeMixin, Owned, Linkable, db.Doc
560
561
  auditable=False,
561
562
  )
562
563
  description = field(db.StringField(required=True, default=""))
564
+ description_short = field(db.StringField(max_length=DESCRIPTION_SHORT_SIZE_LIMIT))
563
565
  license = field(db.ReferenceField("License"))
564
566
 
565
567
  tags = field(db.TagListField())
@@ -1,11 +1,12 @@
1
1
  import logging
2
2
  from datetime import date
3
- from typing import Generator
3
+ from typing import ClassVar, Generator
4
4
 
5
5
  import lxml.etree as ET
6
6
  from flask import current_app
7
7
  from rdflib import Graph
8
8
  from rdflib.namespace import RDF
9
+ from typing_extensions import override
9
10
 
10
11
  from udata.core.dataservices.rdf import dataservice_from_rdf
11
12
  from udata.core.dataset.rdf import dataset_from_rdf
@@ -55,9 +56,6 @@ URIS_TO_REPLACE = {
55
56
  }
56
57
 
57
58
 
58
- SAFE_PARSER = ET.XMLParser(resolve_entities=False)
59
-
60
-
61
59
  def extract_graph(source, target, node, specs):
62
60
  for p, o in source.predicate_objects(node):
63
61
  target.add((node, p, o))
@@ -240,104 +238,165 @@ class DcatBackend(BaseBackend):
240
238
  return node
241
239
  raise ValueError(f"Unable to find dataset with DCT.identifier:{item.remote_id}")
242
240
 
243
- def next_record_if_should_continue(self, start, search_results):
244
- next_record = int(search_results.attrib["nextRecord"])
245
- matched_count = int(search_results.attrib["numberOfRecordsMatched"])
246
- returned_count = int(search_results.attrib["numberOfRecordsReturned"])
247
241
 
248
- # Break conditions copied gratefully from
249
- # noqa https://github.com/geonetwork/core-geonetwork/blob/main/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/csw/Harvester.java#L338-L369
250
- break_conditions = (
251
- # standard CSW: A value of 0 means all records have been returned.
252
- next_record == 0,
253
- # Misbehaving CSW server returning a next record > matched count
254
- next_record > matched_count,
255
- # No results returned already
256
- returned_count == 0,
257
- # Current next record is lower than previous one
258
- next_record < start,
259
- # Enough items have been harvested already
260
- self.max_items and len(self.job.items) >= self.max_items,
261
- )
242
+ class CswDcatBackend(DcatBackend):
243
+ """
244
+ CSW harvester fetching records as DCAT.
245
+ The parsing of items is then the same as for the DcatBackend.
246
+ """
262
247
 
263
- if any(break_conditions):
264
- return None
265
- else:
266
- return next_record
248
+ display_name = "CSW-DCAT"
267
249
 
250
+ # CSW_REQUEST is based on:
251
+ # - Request syntax from spec [1] and example requests [1] [2].
252
+ # - Sort settings to ensure stable paging [3].
253
+ # - Filter settings to only retrieve record types currently mapped in udata.
254
+ #
255
+ # If you modify the request, make sure:
256
+ # - `typeNames` and `outputSchema` are consistent. You'll likely want to keep "gmd:MD_Metadata",
257
+ # since "csw:Record" contains less information.
258
+ # - `typeNames` and namespaces in `csw:Query` (`Filter`, `SortBy`, ...) are consistent, although
259
+ # they are ignored on some servers [4] [5].
260
+ # - It works on real catalogs! Not many servers implement the whole spec.
261
+ #
262
+ # References:
263
+ # [1] OpenGIS Catalogue Services Specification 2.0.2 – ISO Metadata Application Profile: Corrigendum
264
+ # https://portal.ogc.org/files/80534
265
+ # [2] GeoNetwork - CSW test requests
266
+ # https://github.com/geonetwork/core-geonetwork/tree/3.10.4/web/src/main/webapp/xml/csw/test
267
+ # [3] Udata - Support csw dcat harvest
268
+ # https://github.com/opendatateam/udata/pull/2800#discussion_r1129053500
269
+ # [4] GeoNetwork - GetRecords ignores namespaces for Filter/SortBy fields
270
+ # https://github.com/geonetwork/core-geonetwork/blob/3.10.4/csw-server/src/main/java/org/fao/geonet/kernel/csw/services/getrecords/FieldMapper.java#L92
271
+ # [5] GeoNetwork - GetRecords ignores `typeNames`
272
+ # https://github.com/geonetwork/core-geonetwork/blob/3.10.4/csw-server/src/main/java/org/fao/geonet/kernel/csw/services/getrecords/CatalogSearcher.java#L194
273
+ CSW_REQUEST: ClassVar[str] = """
274
+ <csw:GetRecords xmlns:apiso="http://www.opengis.net/cat/csw/apiso/1.0"
275
+ xmlns:csw="http://www.opengis.net/cat/csw/2.0.2"
276
+ xmlns:ogc="http://www.opengis.net/ogc"
277
+ service="CSW" version="2.0.2" outputFormat="application/xml"
278
+ resultType="results" startPosition="{start}" maxRecords="25"
279
+ outputSchema="{output_schema}">
280
+ <csw:Query typeNames="gmd:MD_Metadata">
281
+ <csw:ElementSetName>full</csw:ElementSetName>
282
+ <csw:Constraint version="1.1.0">
283
+ <ogc:Filter>
284
+ <ogc:Or>
285
+ <ogc:PropertyIsEqualTo>
286
+ <ogc:PropertyName>apiso:type</ogc:PropertyName>
287
+ <ogc:Literal>dataset</ogc:Literal>
288
+ </ogc:PropertyIsEqualTo>
289
+ <ogc:PropertyIsEqualTo>
290
+ <ogc:PropertyName>apiso:type</ogc:PropertyName>
291
+ <ogc:Literal>nonGeographicDataset</ogc:Literal>
292
+ </ogc:PropertyIsEqualTo>
293
+ <ogc:PropertyIsEqualTo>
294
+ <ogc:PropertyName>apiso:type</ogc:PropertyName>
295
+ <ogc:Literal>series</ogc:Literal>
296
+ </ogc:PropertyIsEqualTo>
297
+ <ogc:PropertyIsEqualTo>
298
+ <ogc:PropertyName>apiso:type</ogc:PropertyName>
299
+ <ogc:Literal>service</ogc:Literal>
300
+ </ogc:PropertyIsEqualTo>
301
+ </ogc:Or>
302
+ </ogc:Filter>
303
+ </csw:Constraint>
304
+ <ogc:SortBy>
305
+ <ogc:SortProperty>
306
+ <ogc:PropertyName>apiso:identifier</ogc:PropertyName>
307
+ <ogc:SortOrder>ASC</ogc:SortOrder>
308
+ </ogc:SortProperty>
309
+ </ogc:SortBy>
310
+ </csw:Query>
311
+ </csw:GetRecords>
312
+ """
268
313
 
269
- class CswDcatBackend(DcatBackend):
270
- display_name = "CSW-DCAT"
314
+ CSW_OUTPUT_SCHEMA = "http://www.w3.org/ns/dcat#"
271
315
 
272
- DCAT_SCHEMA = "http://www.w3.org/ns/dcat#"
316
+ def __init__(self, *args, **kwargs):
317
+ super().__init__(*args, **kwargs)
318
+ self.xml_parser = ET.XMLParser(resolve_entities=False)
273
319
 
274
320
  def walk_graph(self, url: str, fmt: str) -> Generator[tuple[int, Graph], None, None]:
275
321
  """
276
322
  Yield all RDF pages as `Graph` from the source
277
323
  """
278
- body = """<csw:GetRecords xmlns:csw="http://www.opengis.net/cat/csw/2.0.2"
279
- xmlns:gmd="http://www.isotc211.org/2005/gmd"
280
- service="CSW" version="2.0.2" resultType="results"
281
- startPosition="{start}" maxPosition="200"
282
- outputSchema="{schema}">
283
- <csw:Query typeNames="gmd:MD_Metadata">
284
- <csw:ElementSetName>full</csw:ElementSetName>
285
- <ogc:SortBy xmlns:ogc="http://www.opengis.net/ogc">
286
- <ogc:SortProperty>
287
- <ogc:PropertyName>identifier</ogc:PropertyName>
288
- <ogc:SortOrder>ASC</ogc:SortOrder>
289
- </ogc:SortProperty>
290
- </ogc:SortBy>
291
- </csw:Query>
292
- </csw:GetRecords>"""
293
- headers = {"Content-Type": "application/xml"}
294
-
295
324
  page_number = 0
296
325
  start = 1
297
326
 
298
- response = self.post(
299
- url, data=body.format(start=start, schema=self.DCAT_SCHEMA), headers=headers
300
- )
301
- response.raise_for_status()
302
- content = response.content
303
- tree = ET.fromstring(content, parser=SAFE_PARSER)
304
- if tree.tag == "{" + OWS_NAMESPACE + "}ExceptionReport":
305
- raise ValueError(f"Failed to query CSW:\n{content}")
306
- while tree is not None:
327
+ while True:
328
+ data = self.CSW_REQUEST.format(output_schema=self.CSW_OUTPUT_SCHEMA, start=start)
329
+ response = self.post(url, data=data, headers={"Content-Type": "application/xml"})
330
+ response.raise_for_status()
331
+
332
+ content = response.content
333
+ tree = ET.fromstring(content, parser=self.xml_parser)
334
+ if tree.tag == "{" + OWS_NAMESPACE + "}ExceptionReport":
335
+ raise ValueError(f"Failed to query CSW:\n{content}")
336
+
307
337
  search_results = tree.find("csw:SearchResults", {"csw": CSW_NAMESPACE})
308
- if search_results is None:
338
+ if not search_results:
309
339
  log.error(f"No search results found for {url} on page {page_number}")
310
- break
311
- for child in search_results:
340
+ return
341
+
342
+ for result in search_results:
312
343
  subgraph = Graph(namespace_manager=namespace_manager)
313
- subgraph.parse(data=ET.tostring(child), format=fmt)
344
+ doc = ET.tostring(self.as_dcat(result))
345
+ subgraph.parse(data=doc, format=fmt)
346
+
347
+ if not subgraph.subjects(
348
+ RDF.type, [DCAT.Dataset, DCAT.DatasetSeries, DCAT.DataService]
349
+ ):
350
+ raise ValueError("Failed to fetch CSW content")
314
351
 
315
352
  yield page_number, subgraph
353
+
316
354
  if self.has_reached_max_items():
317
355
  return
318
356
 
319
- next_record = self.next_record_if_should_continue(start, search_results)
320
- if not next_record:
321
- break
322
-
323
- start = next_record
324
357
  page_number += 1
358
+ start = self.next_position(start, search_results)
359
+ if not start:
360
+ return
325
361
 
326
- tree = ET.fromstring(
327
- self.post(
328
- url, data=body.format(start=start, schema=self.DCAT_SCHEMA), headers=headers
329
- ).content,
330
- parser=SAFE_PARSER,
331
- )
362
+ def as_dcat(self, tree: ET._Element) -> ET._Element:
363
+ """
364
+ Return the input tree as a DCAT tree.
365
+ For CswDcatBackend, this method return the incoming tree as-is, since it's already DCAT.
366
+ For subclasses of CswDcatBackend, this method should convert the incoming tree to DCAT.
367
+ """
368
+ return tree
369
+
370
+ def next_position(self, start: int, search_results: ET._Element) -> int | None:
371
+ next_record = int(search_results.attrib["nextRecord"])
372
+ matched_count = int(search_results.attrib["numberOfRecordsMatched"])
373
+ returned_count = int(search_results.attrib["numberOfRecordsReturned"])
374
+
375
+ # Break conditions copied gratefully from
376
+ # noqa https://github.com/geonetwork/core-geonetwork/blob/main/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/csw/Harvester.java#L338-L369
377
+ should_break = (
378
+ # A value of 0 means all records have been returned (standard CSW)
379
+ (next_record == 0)
380
+ # Misbehaving CSW server returning a next record > matched count
381
+ or (next_record > matched_count)
382
+ # No results returned already
383
+ or (returned_count == 0)
384
+ # Current next record is lower than previous one
385
+ or (next_record < start)
386
+ # Enough items have been harvested already
387
+ or self.has_reached_max_items()
388
+ )
389
+ return None if should_break else next_record
332
390
 
333
391
 
334
- class CswIso19139DcatBackend(DcatBackend):
392
+ class CswIso19139DcatBackend(CswDcatBackend):
335
393
  """
336
- An harvester that takes CSW ISO 19139 as input and transforms it to DCAT using SEMIC GeoDCAT-AP XSLT.
394
+ CSW harvester fetching records as ISO-19139 and using XSLT to convert them to DCAT.
337
395
  The parsing of items is then the same as for the DcatBackend.
338
396
  """
339
397
 
340
398
  display_name = "CSW-ISO-19139"
399
+
341
400
  extra_configs = (
342
401
  HarvestExtraConfig(
343
402
  _("Remote URL prefix"),
@@ -347,94 +406,14 @@ class CswIso19139DcatBackend(DcatBackend):
347
406
  ),
348
407
  )
349
408
 
350
- ISO_SCHEMA = "http://www.isotc211.org/2005/gmd"
351
-
352
- def walk_graph(self, url: str, fmt: str) -> Generator[tuple[int, Graph], None, None]:
353
- """
354
- Yield all RDF pages as `Graph` from the source
355
-
356
- Parse CSW graph querying ISO schema.
357
- Use SEMIC GeoDCAT-AP XSLT to map it to a correct version.
358
- See https://github.com/SEMICeu/iso-19139-to-dcat-ap for more information on the XSLT.
359
- """
360
- # Load XSLT
361
- xsl_url = current_app.config["HARVEST_ISO19139_XSL_URL"]
362
- xsl = ET.fromstring(self.get(xsl_url).content, parser=SAFE_PARSER)
363
- transform = ET.XSLT(xsl)
364
-
365
- # Start querying and parsing graph
366
- # Filter on dataset or serie records
367
- body = """<csw:GetRecords xmlns:csw="http://www.opengis.net/cat/csw/2.0.2"
368
- xmlns:gmd="http://www.isotc211.org/2005/gmd"
369
- service="CSW" version="2.0.2" resultType="results"
370
- startPosition="{start}" maxPosition="10"
371
- outputSchema="{schema}">
372
- <csw:Query typeNames="csw:Record">
373
- <csw:ElementSetName>full</csw:ElementSetName>
374
- <csw:Constraint version="1.1.0">
375
- <ogc:Filter xmlns:ogc="http://www.opengis.net/ogc">
376
- <ogc:Or xmlns:ogc="http://www.opengis.net/ogc">
377
- <ogc:PropertyIsEqualTo>
378
- <ogc:PropertyName>dc:type</ogc:PropertyName>
379
- <ogc:Literal>dataset</ogc:Literal>
380
- </ogc:PropertyIsEqualTo>
381
- <ogc:PropertyIsEqualTo>
382
- <ogc:PropertyName>dc:type</ogc:PropertyName>
383
- <ogc:Literal>service</ogc:Literal>
384
- </ogc:PropertyIsEqualTo>
385
- <ogc:PropertyIsEqualTo>
386
- <ogc:PropertyName>dc:type</ogc:PropertyName>
387
- <ogc:Literal>series</ogc:Literal>
388
- </ogc:PropertyIsEqualTo>
389
- </ogc:Or>
390
- </ogc:Filter>
391
- </csw:Constraint>
392
- </csw:Query>
393
- </csw:GetRecords>"""
394
- headers = {"Content-Type": "application/xml"}
395
-
396
- page_number = 0
397
- start = 1
398
-
399
- response = self.post(
400
- url, data=body.format(start=start, schema=self.ISO_SCHEMA), headers=headers
401
- )
402
- response.raise_for_status()
403
-
404
- tree_before_transform = ET.fromstring(response.content, parser=SAFE_PARSER)
405
- # Disabling CoupledResourceLookUp to prevent failure on xlink:href
406
- # https://github.com/SEMICeu/iso-19139-to-dcat-ap/blob/master/documentation/HowTo.md#parameter-coupledresourcelookup
407
- tree = transform(tree_before_transform, CoupledResourceLookUp="'disabled'")
408
-
409
- while tree:
410
- # We query the tree before the transformation because the XSLT remove the search results
411
- # infos (useful for pagination)
412
- search_results = tree_before_transform.find("csw:SearchResults", {"csw": CSW_NAMESPACE})
413
- if search_results is None:
414
- log.error(f"No search results found for {url} on page {page_number}")
415
- break
416
-
417
- subgraph = Graph(namespace_manager=namespace_manager)
418
- subgraph.parse(ET.tostring(tree), format=fmt)
419
-
420
- if not subgraph.subjects(RDF.type, DCAT.Dataset):
421
- raise ValueError("Failed to fetch CSW content")
409
+ CSW_OUTPUT_SCHEMA = "http://www.isotc211.org/2005/gmd"
422
410
 
423
- yield page_number, subgraph
424
- if self.has_reached_max_items():
425
- return
426
-
427
- next_record = self.next_record_if_should_continue(start, search_results)
428
- if not next_record:
429
- break
430
-
431
- start = next_record
432
- page_number += 1
433
-
434
- response = self.post(
435
- url, data=body.format(start=start, schema=self.ISO_SCHEMA), headers=headers
436
- )
437
- response.raise_for_status()
411
+ def __init__(self, *args, **kwargs):
412
+ super().__init__(*args, **kwargs)
413
+ xslt_url = current_app.config["HARVEST_ISO19139_XSLT_URL"]
414
+ xslt = ET.fromstring(self.get(xslt_url).content, parser=self.xml_parser)
415
+ self.transform = ET.XSLT(xslt)
438
416
 
439
- tree_before_transform = ET.fromstring(response.content, parser=SAFE_PARSER)
440
- tree = transform(tree_before_transform, CoupledResourceLookUp="'disabled'")
417
+ @override
418
+ def as_dcat(self, tree: ET._Element) -> ET._Element:
419
+ return self.transform(tree, CoupledResourceLookUp="'disabled'")
@@ -899,7 +899,7 @@ class CswIso19139DcatBackendTest:
899
899
  with open(os.path.join(CSW_DCAT_FILES_DIR, "XSLT.xml"), "r") as f:
900
900
  xslt = f.read()
901
901
  url = mock_csw_pagination(rmock, "geonetwork/srv/eng/csw.rdf", "geonetwork-iso-page-{}.xml")
902
- rmock.get(current_app.config.get("HARVEST_ISO19139_XSL_URL"), text=xslt)
902
+ rmock.get(current_app.config.get("HARVEST_ISO19139_XSLT_URL"), text=xslt)
903
903
  org = OrganizationFactory()
904
904
  source = HarvestSourceFactory(
905
905
  backend="csw-iso-19139",
udata/settings.py CHANGED
@@ -283,7 +283,7 @@ class Defaults(object):
283
283
  HARVEST_GRAPHS_S3_BUCKET = None # If the catalog is bigger than `HARVEST_MAX_CATALOG_SIZE_IN_MONGO` store the graph inside S3 instead of MongoDB
284
284
  HARVEST_GRAPHS_S3_FILENAME_PREFIX = "" # Useful to store the graphs inside a subfolder of the bucket. For example by setting `HARVEST_GRAPHS_S3_FILENAME_PREFIX = 'graphs/'`
285
285
 
286
- HARVEST_ISO19139_XSL_URL = "https://raw.githubusercontent.com/SEMICeu/iso-19139-to-dcat-ap/refs/heads/geodcat-ap-2.0.0/iso-19139-to-dcat-ap.xsl"
286
+ HARVEST_ISO19139_XSLT_URL = "https://raw.githubusercontent.com/SEMICeu/iso-19139-to-dcat-ap/refs/heads/geodcat-ap-2.0.0/iso-19139-to-dcat-ap.xsl"
287
287
 
288
288
  # S3 connection details
289
289
  S3_URL = None