PyPI - udata - Versions diffs - 9.1.4.dev31191__py2.py3-none-any.whl → 9.1.4.dev31242__py2.py3-none-any.whl - Mend

udata 9.1.4.dev31191py2.py3-none-any.whl → 9.1.4.dev31242py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of udata might be problematic. Click here for more details.

Files changed (28) hide show

udata/core/dataservices/api.py CHANGED Viewed

@@ -1,18 +1,23 @@
 from datetime import datetime
 import mongoengine
-from flask import request
+from flask import make_response, redirect, request, url_for
 from flask_login import current_user
 from udata.api import API, api
 from udata.api_fields import patch
 from udata.core.dataset.permissions import OwnablePermission
 from udata.core.followers.api import FollowAPI
+from udata.rdf import RDF_EXTENSIONS, graph_response, negociate_content
 from .models import Dataservice
+from .permissions import DataserviceEditPermission
+from .rdf import dataservice_to_rdf
 ns = api.namespace("dataservices", "Dataservices related operations (beta)")
+common_doc = {"params": {"dataservice": "The dataservice ID or slug"}}
 @ns.route("/", endpoint="dataservices")
 class DataservicesAPI(API):
@@ -87,6 +92,37 @@ class DataserviceAPI(API):
         return "", 204
+@ns.route("/<dataservice:dataservice>/rdf", endpoint="dataservice_rdf", doc=common_doc)
+@api.response(404, "Dataservice not found")
+@api.response(410, "Dataservice has been deleted")
+class DataserviceRdfAPI(API):
+    @api.doc("rdf_dataservice")
+    def get(self, dataservice):
+        format = RDF_EXTENSIONS[negociate_content()]
+        url = url_for("api.dataservice_rdf_format", dataservice=dataservice.id, format=format)
+        return redirect(url)
+@ns.route(
+    "/<dataservice:dataservice>/rdf.<format>", endpoint="dataservice_rdf_format", doc=common_doc
+)
+@api.response(404, "Dataservice not found")
+@api.response(410, "Dataservice has been deleted")
+class DataserviceRdfFormatAPI(API):
+    @api.doc("rdf_dataservice_format")
+    def get(self, dataservice, format):
+        if not DataserviceEditPermission(dataservice).can():
+            if dataservice.private:
+                api.abort(404)
+            elif dataservice.deleted_at:
+                api.abort(410)
+        resource = dataservice_to_rdf(dataservice)
+        # bypass flask-restplus make_response, since graph_response
+        # is handling the content negociation directly
+        return make_response(*graph_response(resource, format))
 @ns.route("/<id>/followers/", endpoint="dataservice_followers")
 @ns.doc(
     get={"id": "list_dataservice_followers"},

udata/core/dataservices/models.py CHANGED Viewed

@@ -1,5 +1,7 @@
 from datetime import datetime
+from mongoengine import Q
 import udata.core.contact_point.api_fields as contact_api_fields
 import udata.core.dataset.api_fields as datasets_api_fields
 from udata.api_fields import field, function_field, generate_fields
@@ -29,6 +31,42 @@ class DataserviceQuerySet(OwnedQuerySet):
     def hidden(self):
         return self(db.Q(private=True) | db.Q(deleted_at__ne=None) | db.Q(archived_at__ne=None))
+    def filter_by_dataset_pagination(self, datasets: list[Dataset], page: int):
+        """Paginate the dataservices on the datasets provided.
+        This is a workaround, used (at least) in the catalogs for sites and organizations.
+        We paginate those kinda weirdly, on their datasets. So a given organization or site
+        catalog will only list a `page_size` number of datasets, but we'd still want to display
+        the site's or org's dataservices.
+        We can't "double paginate", so instead:
+        - only if it's the first page, list all the dataservices that serve no dataset
+        - list all the dataservices that serve the datasets in this page
+        """
+        # We need to add Dataservice to the catalog.
+        # In the best world, we want:
+        # - Keep the correct number of datasets on the page (if the requested page size is 100, we should have 100 datasets)
+        # - Have simple MongoDB queries
+        # - Do not duplicate the datasets (each dataset is present once in the catalog)
+        # - Do not duplicate the dataservices (each dataservice is present once in the catalog)
+        # - Every referenced dataset for one dataservices present on the page (hard to do)
+        #
+        # Multiple solutions are possible but none check all the constraints.
+        # The selected one is to put all the dataservices referencing at least one of the dataset on
+        # the page at the end of it. It means dataservices could be duplicated (present on multiple pages)
+        # and these dataservices may referenced some datasets not present in the current page. It's working
+        # if somebody is doing the same thing as us (keeping the list of all the datasets IDs for the entire catalog then
+        # listing all dataservices in a second pass)
+        # Another option is to do some tricky Mongo requests to order/group datasets by their presence in some dataservices but
+        # it could be really hard to do with a n..n relation.
+        # Let's keep this solution simple right now and iterate on it in the future.
+        dataservices_filter = Q(datasets__in=[d.id for d in datasets])
+        # On the first page, add all dataservices without datasets
+        if page == 1:
+            dataservices_filter = dataservices_filter | Q(datasets__size=0)
+        return self(dataservices_filter)
 @generate_fields()
 class HarvestMetadata(db.EmbeddedDocument):

udata/core/dataset/events.py CHANGED Viewed

@@ -49,7 +49,10 @@ def publish(url, document, resource_id, action):
         "dataset_id": str(document.id),
         "document": resource,
     }
-    r = requests.post(url, json=payload)
+    headers = {}
+    if current_app.config["RESOURCES_ANALYSER_API_KEY"]:
+        headers = {"Authorization": f"Bearer {current_app.config['RESOURCES_ANALYSER_API_KEY']}"}
+    r = requests.post(url, json=payload, headers=headers)
     r.raise_for_status()

udata/core/organization/api.py CHANGED Viewed

@@ -10,6 +10,7 @@ from udata.core.badges import api as badges_api
 from udata.core.badges.fields import badge_fields
 from udata.core.contact_point.api import ContactPointApiParser
 from udata.core.contact_point.api_fields import contact_point_page_fields
+from udata.core.dataservices.models import Dataservice
 from udata.core.dataset.api import DatasetApiParser
 from udata.core.dataset.api_fields import dataset_page_fields
 from udata.core.dataset.models import Dataset
@@ -175,7 +176,12 @@ class OrganizationRdfFormatAPI(API):
         page = int(params.get("page", 1))
         page_size = int(params.get("page_size", 100))
         datasets = Dataset.objects(organization=org).visible().paginate(page, page_size)
-        catalog = build_org_catalog(org, datasets, format=format)
+        dataservices = (
+            Dataservice.objects(organization=org)
+            .visible()
+            .filter_by_dataset_pagination(datasets, page)
+        )
+        catalog = build_org_catalog(org, datasets, dataservices, format=format)
         # bypass flask-restplus make_response, since graph_response
         # is handling the content negociation directly
         return make_response(*graph_response(catalog, format))

udata/core/organization/rdf.py CHANGED Viewed

@@ -7,6 +7,7 @@ from flask import url_for
 from rdflib import BNode, Graph, Literal, URIRef
 from rdflib.namespace import FOAF, RDF, RDFS
+from udata.core.dataservices.rdf import dataservice_to_rdf
 from udata.core.dataset.rdf import dataset_to_rdf
 from udata.rdf import DCAT, DCT, namespace_manager, paginate_catalog
 from udata.uris import endpoint_for
@@ -35,7 +36,7 @@ def organization_to_rdf(org, graph=None):
     return o
-def build_org_catalog(org, datasets, format=None):
+def build_org_catalog(org, datasets, dataservices, format=None):
     graph = Graph(namespace_manager=namespace_manager)
     org_catalog_url = url_for("api.organization_rdf", org=org.id, _external=True)
@@ -47,6 +48,8 @@ def build_org_catalog(org, datasets, format=None):
     for dataset in datasets:
         catalog.add(DCAT.dataset, dataset_to_rdf(dataset, graph))
+    for dataservice in dataservices:
+        catalog.add(DCAT.dataservice, dataservice_to_rdf(dataservice, graph))
     values = {"org": org.id}

udata/core/site/api.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from bson import ObjectId
 from flask import json, make_response, redirect, request, url_for
-from mongoengine import Q
 from udata.api import API, api, fields
 from udata.auth import admin_permission
@@ -107,31 +106,7 @@ class SiteRdfCatalogFormat(API):
         if "tag" in params:
             datasets = datasets.filter(tags=params.get("tag", ""))
         datasets = datasets.paginate(page, page_size)
-        # We need to add Dataservice to the catalog.
-        # In the best world, we want:
-        # - Keep the correct number of datasets on the page (if the requested page size is 100, we should have 100 datasets)
-        # - Have simple MongoDB queries
-        # - Do not duplicate the datasets (each dataset is present once in the catalog)
-        # - Do not duplicate the dataservices (each dataservice is present once in the catalog)
-        # - Every referenced dataset for one dataservices present on the page (hard to do)
-        #
-        # Multiple solutions are possible but none check all the constraints.
-        # The selected one is to put all the dataservices referencing at least one of the dataset on
-        # the page at the end of it. It means dataservices could be duplicated (present on multiple pages)
-        # and these dataservices may referenced some datasets not present in the current page. It's working
-        # if somebody is doing the same thing as us (keeping the list of all the datasets IDs for the entire catalog then
-        # listing all dataservices in a second pass)
-        # Another option is to do some tricky Mongo requests to order/group datasets by their presence in some dataservices but
-        # it could be really hard to do with a n..n relation.
-        # Let's keep this solution simple right now and iterate on it in the future.
-        dataservices_filter = Q(datasets__in=[d.id for d in datasets])
-        # On the first page, add all dataservices without datasets
-        if page == 1:
-            dataservices_filter = dataservices_filter | Q(datasets__size=0)
-        dataservices = Dataservice.objects.visible().filter(dataservices_filter)
+        dataservices = Dataservice.objects.visible().filter_by_dataset_pagination(datasets, page)
         catalog = build_catalog(current_site, datasets, dataservices=dataservices, format=format)
         # bypass flask-restplus make_response, since graph_response

udata/settings.py CHANGED Viewed

@@ -522,6 +522,7 @@ class Defaults(object):
     FIXTURE_DATASET_SLUGS = []
     PUBLISH_ON_RESOURCE_EVENTS = False
     RESOURCES_ANALYSER_URI = "http://localhost:8000"
+    RESOURCES_ANALYSER_API_KEY = None
     # Datasets quality settings
     ###########################################################################

udata 9.1.4.dev31191__py2.py3-none-any.whl → 9.1.4.dev31242__py2.py3-none-any.whl

Potentially problematic release.

udata 9.1.4.dev31191py2.py3-none-any.whl → 9.1.4.dev31242py2.py3-none-any.whl