udata 13.0.0__py3-none-any.whl → 13.0.1.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of udata might be problematic. Click here for more details.

@@ -714,6 +714,48 @@ class DcatBackendTest(PytestOnlyDBTestCase):
714
714
  ) # noqa
715
715
  assert dataset.harvest.last_update.date() == date.today()
716
716
 
717
+ def test_datara_extended_roles_foaf(self, rmock):
718
+ # Converted manually from ISO-19139 using SEMICeu XSLT (tag geodcat-ap-2.0.0)
719
+ url = mock_dcat(rmock, "datara--5a26b0f6-0ccf-46ad-ac58-734054b91977.rdf.xml")
720
+ org = OrganizationFactory()
721
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
722
+ actions.run(source)
723
+ dataset = Dataset.objects.filter(organization=org).first()
724
+
725
+ assert dataset is not None
726
+ assert len(dataset.contact_points) == 2
727
+
728
+ assert dataset.contact_points[0].name == "IGN"
729
+ assert dataset.contact_points[0].email == "sav.bd@ign.fr"
730
+ assert dataset.contact_points[0].role == "rightsHolder"
731
+
732
+ assert dataset.contact_points[1].name == "Administrateur de Données"
733
+ assert dataset.contact_points[1].email == "sig.dreal-ara@developpement-durable.gouv.fr"
734
+ assert dataset.contact_points[1].role == "user"
735
+
736
+ def test_datara_extended_roles_vcard(self, rmock):
737
+ # Converted manually from ISO-19139 using SEMICeu XSLT (tag geodcat-ap-2.0.0)
738
+ url = mock_dcat(rmock, "datara--f40c3860-7236-4b30-a141-23b8ae33f7b2.rdf.xml")
739
+ org = OrganizationFactory()
740
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
741
+ actions.run(source)
742
+ dataset = Dataset.objects.filter(organization=org).first()
743
+
744
+ assert dataset is not None
745
+ assert len(dataset.contact_points) == 3
746
+
747
+ assert dataset.contact_points[0].name == "Administrateur de Données"
748
+ assert dataset.contact_points[0].email == "sig.dreal-ara@developpement-durable.gouv.fr"
749
+ assert dataset.contact_points[0].role == "contact"
750
+
751
+ assert dataset.contact_points[1].name == "Jean-Michel GENIS"
752
+ assert dataset.contact_points[1].email == "jm.genis@cbn-alpin.fr"
753
+ assert dataset.contact_points[1].role == "rightsHolder"
754
+
755
+ assert dataset.contact_points[2].name == "Conservatoire Botanique National Massif Central"
756
+ assert dataset.contact_points[2].email == "Benoit.Renaux@cbnmc.fr"
757
+ assert dataset.contact_points[2].role == "rightsHolder"
758
+
717
759
  def test_udata_xml_catalog(self, rmock):
718
760
  LicenseFactory(id="fr-lo", title="Licence ouverte / Open Licence")
719
761
  url = mock_dcat(rmock, "udata.xml")
@@ -0,0 +1,28 @@
1
+ """
2
+ This migration empties harvest.modified_at field in the case of CKAN datasets.
3
+ Indeed, the value that was stored in this field was the *metadata* modification data
4
+ and not the *data* one, contrary to other backends.
5
+ """
6
+
7
+ import logging
8
+
9
+ import click
10
+
11
+ from udata.core.dataset.models import Dataset
12
+
13
+ log = logging.getLogger(__name__)
14
+
15
+
16
+ def migrate(db):
17
+ datasets = Dataset.objects(harvest__backend="CKAN", harvest__modified_at__exists=True)
18
+ count = datasets.count()
19
+
20
+ with click.progressbar(datasets, length=count) as datasets:
21
+ for dataset in datasets:
22
+ dataset.harvest.modified_at = None
23
+ try:
24
+ dataset.save()
25
+ except Exception as err:
26
+ log.error(f"Cannot save dataset {dataset.id} {err}")
27
+ log.info(f"Updated {count} datasets")
28
+ log.info("Done")
udata/rdf.py CHANGED
@@ -136,15 +136,21 @@ INSPIRE_GEMET_SCHEME_URIS = [
136
136
 
137
137
  AGENT_ROLE_TO_RDF_PREDICATE = {
138
138
  "contact": DCAT.contactPoint,
139
- "publisher": DCT.publisher,
140
139
  "creator": DCT.creator,
140
+ "publisher": DCT.publisher,
141
+ "rightsHolder": DCT.rightsHolder,
142
+ "custodian": GEODCAT.custodian,
143
+ "distributor": GEODCAT.distributor,
144
+ "originator": GEODCAT.originator,
145
+ "principalInvestigator": GEODCAT.principalInvestigator,
146
+ "processor": GEODCAT.processor,
147
+ "resourceProvider": GEODCAT.resourceProvider,
148
+ "user": GEODCAT.user,
141
149
  }
142
150
 
143
151
  # Map rdf contact point entity to role
144
152
  CONTACT_POINT_ENTITY_TO_ROLE = {
145
- DCAT.contactPoint: "contact",
146
- DCT.publisher: "publisher",
147
- DCT.creator: "creator",
153
+ predicate: role for role, predicate in AGENT_ROLE_TO_RDF_PREDICATE.items()
148
154
  }
149
155
 
150
156
 
@@ -353,6 +359,8 @@ def themes_from_rdf(rdf):
353
359
 
354
360
 
355
361
  def contact_points_from_rdf(rdf, prop, role, dataset):
362
+ if not dataset.organization and not dataset.owner:
363
+ return
356
364
  for contact_point in rdf.objects(prop):
357
365
  # Read contact point information
358
366
  if isinstance(contact_point, Literal):
@@ -365,7 +373,7 @@ def contact_points_from_rdf(rdf, prop, role, dataset):
365
373
  email = (
366
374
  rdf_value(contact_point, VCARD.hasEmail)
367
375
  or rdf_value(contact_point, VCARD.email)
368
- or rdf_value(contact_point, DCAT.email)
376
+ or None
369
377
  )
370
378
  email = email.replace("mailto:", "").strip() if email else None
371
379
  contact_form = rdf_value(contact_point, VCARD.hasUrl)
@@ -384,8 +392,6 @@ def contact_points_from_rdf(rdf, prop, role, dataset):
384
392
  # continue
385
393
 
386
394
  # Create of get contact point object
387
- if not dataset.organization and not dataset.owner:
388
- continue
389
395
  org_or_owner = {}
390
396
  if dataset.organization:
391
397
  org_or_owner = {"organization": dataset.organization}
@@ -420,14 +426,25 @@ def contact_points_to_rdf(contacts, graph=None):
420
426
  id = BNode()
421
427
 
422
428
  node = graph.resource(id)
423
- node.set(RDF.type, VCARD.Kind)
424
- if contact.name:
425
- node.set(VCARD.fn, Literal(contact.name))
426
- if contact.email:
427
- node.set(VCARD.hasEmail, URIRef(f"mailto:{contact.email}"))
428
- if contact.contact_form:
429
- node.set(VCARD.hasUrl, URIRef(contact.contact_form))
430
- yield node, AGENT_ROLE_TO_RDF_PREDICATE.get(contact.role, DCAT.contactPoint)
429
+ role = AGENT_ROLE_TO_RDF_PREDICATE.get(contact.role, DCAT.contactPoint)
430
+ # GeoDCAT-AP spec: Only contactPoint is a VCARD.Kind (like in DCAT). Other roles are FOAF.Agent.
431
+ if role == DCAT.contactPoint:
432
+ node.set(RDF.type, VCARD.Kind)
433
+ if contact.name:
434
+ node.set(VCARD.fn, Literal(contact.name))
435
+ if contact.email:
436
+ node.set(VCARD.hasEmail, URIRef(f"mailto:{contact.email}"))
437
+ if contact.contact_form:
438
+ node.set(VCARD.hasUrl, URIRef(contact.contact_form))
439
+ else:
440
+ node.set(RDF.type, FOAF.Agent)
441
+ node.set(FOAF.name, Literal(contact.name))
442
+ if contact.email:
443
+ node.set(FOAF.mbox, URIRef(f"mailto:{contact.email}"))
444
+ if contact.contact_form:
445
+ node.set(FOAF.page, URIRef(contact.contact_form))
446
+
447
+ yield node, role
431
448
 
432
449
 
433
450
  def primary_topic_identifier_from_rdf(graph: Graph, resource: RdfResource):
@@ -5,7 +5,7 @@ import pytest
5
5
  import requests
6
6
  from flask import url_for
7
7
  from rdflib import BNode, Graph, Literal, Namespace, URIRef
8
- from rdflib.namespace import FOAF, RDF, RDFS
8
+ from rdflib.namespace import FOAF, ORG, RDF, RDFS
9
9
  from rdflib.resource import Resource as RdfResource
10
10
 
11
11
  from udata.core.contact_point.factories import ContactPointFactory
@@ -166,8 +166,8 @@ class DatasetToRdfTest(PytestOnlyAPITestCase):
166
166
  d = dataset_to_rdf(dataset)
167
167
 
168
168
  contact_rdf = d.value(DCT.publisher)
169
- assert contact_rdf.value(RDF.type).identifier == VCARD.Kind
170
- assert contact_rdf.value(VCARD.fn) == Literal("Publisher Contact")
169
+ assert contact_rdf.value(RDF.type).identifier == FOAF.Agent
170
+ assert contact_rdf.value(FOAF.name) == Literal("Publisher Contact")
171
171
 
172
172
  org_rdf = d.value(GEODCAT.distributor)
173
173
  assert org_rdf.value(RDF.type).identifier == FOAF.Organization
@@ -514,6 +514,194 @@ class RdfToDatasetTest(PytestOnlyDBTestCase):
514
514
  assert isinstance(dataset, Dataset)
515
515
  assert dataset.harvest.modified_at is None
516
516
 
517
+ def test_contact_point_individual_vcard(self):
518
+ g = Graph()
519
+ node = URIRef("https://test.org/dataset")
520
+ g.set((node, RDF.type, DCAT.Dataset))
521
+ g.set((node, DCT.identifier, Literal(faker.uuid4())))
522
+ g.set((node, DCT.title, Literal(faker.sentence())))
523
+
524
+ contact = BNode()
525
+ g.add((contact, RDF.type, VCARD.Individual))
526
+ g.add((contact, VCARD.fn, Literal("foo")))
527
+ g.add((contact, VCARD.email, Literal("foo@example.com")))
528
+ g.add((node, DCAT.contactPoint, contact))
529
+
530
+ # Dataset needs an owner/organization for contact_points_from_rdf() to work
531
+ d = DatasetFactory.build()
532
+ d.organization = OrganizationFactory(name="organization")
533
+
534
+ dataset = dataset_from_rdf(g, d)
535
+ dataset.validate()
536
+
537
+ assert len(dataset.contact_points) == 1
538
+ assert dataset.contact_points[0].role == "contact"
539
+ assert dataset.contact_points[0].name == "foo"
540
+ assert dataset.contact_points[0].email == "foo@example.com"
541
+
542
+ def test_contact_point_individual_foaf(self):
543
+ g = Graph()
544
+ node = URIRef("https://test.org/dataset")
545
+ g.set((node, RDF.type, DCAT.Dataset))
546
+ g.set((node, DCT.identifier, Literal(faker.uuid4())))
547
+ g.set((node, DCT.title, Literal(faker.sentence())))
548
+
549
+ contact = BNode()
550
+ contact_name = Literal("foo")
551
+ contact_email = Literal("foo@example.com")
552
+ g.add((contact, RDF.type, FOAF.Person))
553
+ g.add((contact, FOAF.name, contact_name))
554
+ g.add((contact, FOAF.mbox, contact_email))
555
+ g.add((node, DCT.creator, contact))
556
+
557
+ # Dataset needs an owner/organization for contact_points_from_rdf() to work
558
+ d = DatasetFactory.build()
559
+ d.organization = OrganizationFactory(name="organization")
560
+
561
+ dataset = dataset_from_rdf(g, d)
562
+ dataset.validate()
563
+
564
+ assert len(dataset.contact_points) == 1
565
+ assert dataset.contact_points[0].role == "creator"
566
+ assert dataset.contact_points[0].name == "foo"
567
+ assert dataset.contact_points[0].email == "foo@example.com"
568
+
569
+ def test_contact_point_organization_vcard(self):
570
+ g = Graph()
571
+ node = URIRef("https://test.org/dataset")
572
+ g.set((node, RDF.type, DCAT.Dataset))
573
+ g.set((node, DCT.identifier, Literal(faker.uuid4())))
574
+ g.set((node, DCT.title, Literal(faker.sentence())))
575
+
576
+ contact = BNode()
577
+ g.add((contact, RDF.type, VCARD.Organization))
578
+ g.add((contact, VCARD.fn, Literal("foo")))
579
+ g.add((contact, VCARD.email, Literal("foo@example.com")))
580
+ g.add((node, DCAT.contactPoint, contact))
581
+
582
+ # Dataset needs an owner/organization for contact_points_from_rdf() to work
583
+ d = DatasetFactory.build()
584
+ d.organization = OrganizationFactory(name="organization")
585
+
586
+ dataset = dataset_from_rdf(g, d)
587
+ dataset.validate()
588
+
589
+ assert len(dataset.contact_points) == 1
590
+ assert dataset.contact_points[0].role == "contact"
591
+ assert dataset.contact_points[0].name == "foo"
592
+ assert dataset.contact_points[0].email == "foo@example.com"
593
+
594
+ def test_contact_point_organization_foaf(self):
595
+ g = Graph()
596
+ node = URIRef("https://test.org/dataset")
597
+ g.set((node, RDF.type, DCAT.Dataset))
598
+ g.set((node, DCT.identifier, Literal(faker.uuid4())))
599
+ g.set((node, DCT.title, Literal(faker.sentence())))
600
+
601
+ contact = BNode()
602
+ g.add((contact, RDF.type, FOAF.Organization))
603
+ g.add((contact, FOAF.name, Literal("foo")))
604
+ g.add((contact, FOAF.mbox, Literal("foo@example.com")))
605
+ g.add((node, DCT.creator, contact))
606
+
607
+ # Dataset needs an owner/organization for contact_points_from_rdf() to work
608
+ d = DatasetFactory.build()
609
+ d.organization = OrganizationFactory(name="organization")
610
+
611
+ dataset = dataset_from_rdf(g, d)
612
+ dataset.validate()
613
+
614
+ assert len(dataset.contact_points) == 1
615
+ assert dataset.contact_points[0].role == "creator"
616
+ assert dataset.contact_points[0].name == "foo"
617
+ assert dataset.contact_points[0].email == "foo@example.com"
618
+
619
+ def test_contact_point_organization_member_vcard(self):
620
+ g = Graph()
621
+ node = URIRef("https://test.org/dataset")
622
+ g.set((node, RDF.type, DCAT.Dataset))
623
+ g.set((node, DCT.identifier, Literal(faker.uuid4())))
624
+ g.set((node, DCT.title, Literal(faker.sentence())))
625
+
626
+ contact = BNode()
627
+ g.add((contact, RDF.type, VCARD.Organization))
628
+ g.add((contact, VCARD.fn, Literal("foo")))
629
+ g.add((contact, VCARD["organization-name"], Literal("bar")))
630
+ g.add((contact, VCARD.email, Literal("foo@example.com")))
631
+ g.add((node, DCAT.contactPoint, contact))
632
+
633
+ # Dataset needs an owner/organization for contact_points_from_rdf() to work
634
+ d = DatasetFactory.build()
635
+ d.organization = OrganizationFactory(name="organization")
636
+
637
+ dataset = dataset_from_rdf(g, d)
638
+ dataset.validate()
639
+
640
+ assert len(dataset.contact_points) == 1
641
+ assert dataset.contact_points[0].role == "contact"
642
+ assert dataset.contact_points[0].name == "foo"
643
+ assert dataset.contact_points[0].email == "foo@example.com"
644
+
645
+ def test_contact_point_organization_member_foaf(self):
646
+ g = Graph()
647
+ node = URIRef("https://test.org/dataset")
648
+ g.set((node, RDF.type, DCAT.Dataset))
649
+ g.set((node, DCT.identifier, Literal(faker.uuid4())))
650
+ g.set((node, DCT.title, Literal(faker.sentence())))
651
+
652
+ org = BNode()
653
+ g.add((org, RDF.type, FOAF.Organization))
654
+ g.add((org, FOAF.name, Literal("bar")))
655
+ g.add((org, FOAF.mbox, Literal("bar@example.com")))
656
+ contact = BNode()
657
+ g.add((contact, RDF.type, FOAF.Person))
658
+ g.add((contact, FOAF.name, Literal("foo")))
659
+ g.add((contact, FOAF.mbox, Literal("foo@example.com")))
660
+ g.add((contact, ORG.memberOf, org))
661
+ g.add((node, DCT.creator, contact))
662
+
663
+ # Dataset needs an owner/organization for contact_points_from_rdf() to work
664
+ d = DatasetFactory.build()
665
+ d.organization = OrganizationFactory(name="organization")
666
+
667
+ dataset = dataset_from_rdf(g, d)
668
+ dataset.validate()
669
+
670
+ assert len(dataset.contact_points) == 1
671
+ assert dataset.contact_points[0].role == "creator"
672
+ assert dataset.contact_points[0].name == "foo"
673
+ assert dataset.contact_points[0].email == "foo@example.com"
674
+
675
+ def test_contact_point_organization_member_foaf_no_mail(self):
676
+ g = Graph()
677
+ node = URIRef("https://test.org/dataset")
678
+ g.set((node, RDF.type, DCAT.Dataset))
679
+ g.set((node, DCT.identifier, Literal(faker.uuid4())))
680
+ g.set((node, DCT.title, Literal(faker.sentence())))
681
+
682
+ org = BNode()
683
+ g.add((org, RDF.type, FOAF.Organization))
684
+ g.add((org, FOAF.name, Literal("bar")))
685
+ # no organization email
686
+ contact = BNode()
687
+ g.add((contact, RDF.type, FOAF.Person))
688
+ g.add((contact, FOAF.name, Literal("foo")))
689
+ g.add((contact, FOAF.mbox, Literal("foo@example.com")))
690
+ g.add((contact, ORG.memberOf, org))
691
+ g.add((node, DCT.creator, contact))
692
+
693
+ # Dataset needs an owner/organization for contact_points_from_rdf() to work
694
+ d = DatasetFactory.build()
695
+ d.organization = OrganizationFactory(name="organization")
696
+
697
+ dataset = dataset_from_rdf(g, d)
698
+ dataset.validate()
699
+
700
+ assert len(dataset.contact_points) == 1
701
+ assert dataset.contact_points[0].role == "creator"
702
+ assert dataset.contact_points[0].name == "foo"
703
+ assert dataset.contact_points[0].email == "foo@example.com"
704
+
517
705
  def test_theme_and_tags(self):
518
706
  node = BNode()
519
707
  g = Graph()
udata/tests/test_rdf.py CHANGED
@@ -7,8 +7,9 @@ from rdflib import (
7
7
  from udata.models import ContactPoint
8
8
  from udata.rdf import (
9
9
  ACCEPTED_MIME_TYPES,
10
+ AGENT_ROLE_TO_RDF_PREDICATE,
10
11
  DCAT,
11
- DCT,
12
+ FOAF,
12
13
  FORMAT_MAP,
13
14
  RDF,
14
15
  VCARD,
@@ -103,14 +104,7 @@ class ContactToRdfTest:
103
104
  # Default predicate is "contact"
104
105
  assert predicate == DCAT.contactPoint
105
106
 
106
- @pytest.mark.parametrize(
107
- "role,predicate",
108
- [
109
- ("contact", DCAT.contactPoint),
110
- ("publisher", DCT.publisher),
111
- ("creator", DCT.creator),
112
- ],
113
- )
107
+ @pytest.mark.parametrize("role,predicate", AGENT_ROLE_TO_RDF_PREDICATE.items())
114
108
  def test_contact_points_to_rdf_roles(self, role, predicate):
115
109
  contact = ContactPoint(
116
110
  name="Organization contact",
@@ -122,10 +116,20 @@ class ContactToRdfTest:
122
116
  contact_rdfs = contact_points_to_rdf([contact], None)
123
117
 
124
118
  for contact_point, contact_point_predicate in contact_rdfs:
125
- assert contact_point.value(RDF.type).identifier == VCARD.Kind
126
- assert contact_point.value(VCARD.fn) == Literal("Organization contact")
127
- assert contact_point.value(VCARD.hasEmail).identifier == URIRef("mailto:hello@its.me")
128
- assert contact_point.value(VCARD.hasUrl).identifier == URIRef(
129
- "https://data.support.com"
130
- )
131
119
  assert contact_point_predicate == predicate
120
+ if predicate == DCAT.contactPoint:
121
+ assert contact_point.value(RDF.type).identifier == VCARD.Kind
122
+ assert contact_point.value(VCARD.fn) == Literal("Organization contact")
123
+ assert contact_point.value(VCARD.hasEmail).identifier == URIRef(
124
+ "mailto:hello@its.me"
125
+ )
126
+ assert contact_point.value(VCARD.hasUrl).identifier == URIRef(
127
+ "https://data.support.com"
128
+ )
129
+ else:
130
+ assert contact_point.value(RDF.type).identifier == FOAF.Agent
131
+ assert contact_point.value(FOAF.name) == Literal("Organization contact")
132
+ assert contact_point.value(FOAF.mbox).identifier == URIRef("mailto:hello@its.me")
133
+ assert contact_point.value(FOAF.page).identifier == URIRef(
134
+ "https://data.support.com"
135
+ )