PyPI - udata - Versions diffs - 9.0.1.dev29687__py2.py3-none-any.whl → 9.0.1.dev29716__py2.py3-none-any.whl - Mend

udata 9.0.1.dev29687py2.py3-none-any.whl → 9.0.1.dev29716py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of udata might be problematic. Click here for more details.

Files changed (29) hide show

udata/core/dataservices/factories.py ADDED Viewed

@@ -0,0 +1,19 @@
+import factory
+from udata.core.dataservices.models import Dataservice
+from udata.core.organization.factories import OrganizationFactory
+from udata.factories import ModelFactory
+class DataserviceFactory(ModelFactory):
+    class Meta:
+        model = Dataservice
+    title = factory.Faker('sentence')
+    description = factory.Faker('text')
+    base_api_url = factory.Faker('url')
+    class Params:
+        org = factory.Trait(
+            organization=factory.SubFactory(OrganizationFactory),
+        )

udata/core/dataservices/rdf.py CHANGED Viewed

@@ -1,13 +1,14 @@
 from datetime import datetime
 from typing import List, Optional
-from rdflib import RDF, Graph, URIRef
+from rdflib import RDF, BNode, Graph, Literal, URIRef
 from udata.core.dataservices.models import Dataservice, HarvestMetadata as HarvestDataserviceMetadata
 from udata.core.dataset.models import Dataset, License
-from udata.core.dataset.rdf import sanitize_html
+from udata.core.dataset.rdf import dataset_to_graph_id, sanitize_html
 from udata.harvest.models import HarvestSource
-from udata.rdf import DCAT, DCT, contact_point_from_rdf, rdf_value, remote_url_from_rdf, theme_labels_from_rdf, themes_from_rdf, url_from_rdf
+from udata.rdf import DCATAP, TAG_TO_EU_HVD_CATEGORIES, namespace_manager, DCAT, DCT, contact_point_from_rdf, rdf_value, remote_url_from_rdf, themes_from_rdf, url_from_rdf
+from udata.uris import endpoint_for
 def dataservice_from_rdf(graph: Graph, dataservice: Dataservice, node, all_datasets: List[Dataset]) -> Dataservice :
     '''
@@ -55,4 +56,54 @@ def dataservice_from_rdf(graph: Graph, dataservice: Dataservice, node, all_datas
     dataservice.tags = themes_from_rdf(d)
-    return dataservice
+    return dataservice
+def dataservice_to_rdf(dataservice, graph=None):
+    '''
+    Map a dataservice domain model to a DCAT/RDF graph
+    '''
+    # Use the unlocalized permalink to the dataset as URI when available
+    # unless there is already an upstream URI
+    if dataservice.harvest and dataservice.harvest.rdf_node_id_as_url:
+        id = URIRef(dataservice.harvest.rdf_node_id_as_url)
+    elif dataservice.id:
+        id = URIRef(endpoint_for('dataservices.show_redirect', 'api.dataservice',
+                    dataservice=dataservice.id, _external=True))
+    else:
+        # Should not happen in production. Some test only
+        # `build()` a dataset without saving it to the DB.
+        id = BNode()
+    # Expose upstream identifier if present
+    if dataservice.harvest and dataservice.harvest.dct_identifier:
+        identifier = dataservice.harvest.dct_identifier
+    else:
+        identifier = dataservice.id
+    graph = graph or Graph(namespace_manager=namespace_manager)
+    d = graph.resource(id)
+    d.set(RDF.type, DCAT.DataService)
+    d.set(DCT.identifier, Literal(identifier))
+    d.set(DCT.title, Literal(dataservice.title))
+    d.set(DCT.description, Literal(dataservice.description))
+    d.set(DCT.issued, Literal(dataservice.created_at))
+    if dataservice.base_api_url:
+        d.set(DCAT.endpointURL, Literal(dataservice.base_api_url))
+    if dataservice.endpoint_description_url:
+        d.set(DCAT.endpointDescription, Literal(dataservice.endpoint_description_url))
+    for tag in dataservice.tags:
+        d.add(DCAT.keyword, Literal(tag))
+    # `dataset_to_graph_id(dataset)` URIRef may not exist in the current page
+    # but should exists in the catalog somewhere. Maybe we should create a Node
+    # with some basic information about this dataset (but this will return a page
+    # with more datasets than the page size… and could be problematic when processing the
+    # correct Node with all the information in a future page)
+    for dataset in dataservice.datasets:
+        d.add(DCAT.servesDataset, dataset_to_graph_id(dataset))
+    return d

udata/core/dataset/rdf.py CHANGED Viewed

@@ -6,7 +6,7 @@ import json
 import logging
 from datetime import date
-from typing import Optional
+from typing import Optional, Union
 from dateutil.parser import parse as parse_dt
 from flask import current_app
 from geomet import wkt
@@ -149,19 +149,25 @@ def resource_to_rdf(resource, dataset=None, graph=None, is_hvd=False):
     return r
+def dataset_to_graph_id(dataset: Dataset) -> Union[URIRef, BNode]:
+    if dataset.harvest and dataset.harvest.uri:
+        return URIRef(dataset.harvest.uri)
+    elif dataset.id:
+        return URIRef(endpoint_for('datasets.show_redirect', 'api.dataset',
+            dataset=dataset.id, _external=True))
+    else:
+        # Should not happen in production. Some test only
+        # `build()` a dataset without saving it to the DB.
+        return BNode()
 def dataset_to_rdf(dataset, graph=None):
     '''
     Map a dataset domain model to a DCAT/RDF graph
     '''
     # Use the unlocalized permalink to the dataset as URI when available
     # unless there is already an upstream URI
-    if dataset.harvest and dataset.harvest.uri:
-        id = URIRef(dataset.harvest.uri)
-    elif dataset.id:
-        id = URIRef(endpoint_for('datasets.show_redirect', 'api.dataset',
-                    dataset=dataset.id, _external=True))
-    else:
-        id = BNode()
+    id = dataset_to_graph_id(dataset)
     # Expose upstream identifier if present
     if dataset.harvest and dataset.harvest.dct_identifier:
         identifier = dataset.harvest.dct_identifier

udata/core/site/api.py CHANGED Viewed

@@ -1,9 +1,11 @@
 from bson import ObjectId
 from flask import request, redirect, url_for, json, make_response
+from mongoengine import Q
 from udata.api import api, API, fields
 from udata.auth import admin_permission
+from udata.core.dataservices.models import Dataservice
 from udata.models import Dataset, Reuse
 from udata.utils import multi_to_dict
 from udata.rdf import (
@@ -109,7 +111,33 @@ class SiteRdfCatalogFormat(API):
         if 'tag' in params:
             datasets = datasets.filter(tags=params.get('tag', ''))
         datasets = datasets.paginate(page, page_size)
-        catalog = build_catalog(current_site, datasets, format=format)
+        # We need to add Dataservice to the catalog.
+        # In the best world, we want:
+        # - Keep the correct number of datasets on the page (if the requested page size is 100, we should have 100 datasets)
+        # - Have simple MongoDB queries
+        # - Do not duplicate the datasets (each dataset is present once in the catalog)
+        # - Do not duplicate the dataservices (each dataservice is present once in the catalog)
+        # - Every referenced dataset for one dataservices present on the page (hard to do)
+        #
+        # Multiple solutions are possible but none check all the constraints.
+        # The selected one is to put all the dataservices referencing at least one of the dataset on
+        # the page at the end of it. It means dataservices could be duplicated (present on multiple pages)
+        # and these dataservices may referenced some datasets not present in the current page. It's working
+        # if somebody is doing the same thing as us (keeping the list of all the datasets IDs for the entire catalog then
+        # listing all dataservices in a second pass)
+        # Another option is to do some tricky Mongo requests to order/group datasets by their presence in some dataservices but
+        # it could be really hard to do with a n..n relation.
+        # Let's keep this solution simple right now and iterate on it in the future.
+        dataservices_filter = Q(datasets__in=[d.id for d in datasets])
+        # On the first page, add all dataservices without datasets
+        if page == 1:
+            dataservices_filter = dataservices_filter | Q(datasets__size=0)
+        dataservices = Dataservice.objects.visible().filter(dataservices_filter)
+        catalog = build_catalog(current_site, datasets, dataservices=dataservices, format=format)
         # bypass flask-restplus make_response, since graph_response
         # is handling the content negociation directly
         return make_response(*graph_response(catalog, format))

udata/core/site/rdf.py CHANGED Viewed

@@ -5,6 +5,7 @@ from flask import url_for, current_app
 from rdflib import Graph, URIRef, Literal, BNode
 from rdflib.namespace import RDF, FOAF
+from udata.core.dataservices.rdf import dataservice_to_rdf
 from udata.core.dataset.rdf import dataset_to_rdf
 from udata.core.organization.rdf import organization_to_rdf
 from udata.core.user.rdf import user_to_rdf
@@ -13,7 +14,7 @@ from udata.utils import Paginable
 from udata.uris import endpoint_for
-def build_catalog(site, datasets, format=None):
+def build_catalog(site, datasets, dataservices = [], format=None):
     '''Build the DCAT catalog for this site'''
     site_url = endpoint_for('site.home_redirect', 'api.site', _external=True)
     catalog_url = url_for('api.site_rdf_catalog', _external=True)
@@ -40,6 +41,10 @@ def build_catalog(site, datasets, format=None):
             rdf_dataset.add(DCT.publisher, organization_to_rdf(dataset.organization, graph))
         catalog.add(DCAT.dataset, rdf_dataset)
+    for dataservice in dataservices:
+        rdf_dataservice = dataservice_to_rdf(dataservice, graph)
+        catalog.add(DCAT.DataService, rdf_dataservice)
     if isinstance(datasets, Paginable):
         paginate_catalog(catalog, graph, datasets, format, 'api.site_rdf_catalog_format')

udata/routing.py CHANGED Viewed

@@ -217,7 +217,7 @@ def lazy_raise_or_redirect():
             new_args = request.view_args
             new_args[name] = value.arg
             new_url = url_for(request.endpoint, **new_args)
-            return redirect(new_url, code=308)
+            return redirect(new_url, code=204 if request.method == 'OPTIONS' else 308)
 def init_app(app):

udata 9.0.1.dev29687__py2.py3-none-any.whl → 9.0.1.dev29716__py2.py3-none-any.whl

Potentially problematic release.

udata 9.0.1.dev29687py2.py3-none-any.whl → 9.0.1.dev29716py2.py3-none-any.whl