PyPI - udata - Versions diffs - 8.0.2.dev29304__py2.py3-none-any.whl → 9.1.0__py2.py3-none-any.whl - Mend

udata 8.0.2.dev29304py2.py3-none-any.whl → 9.1.0py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of udata might be problematic. Click here for more details.

Files changed (86) hide show

udata/__init__.py +1 -1
udata/api/__init__.py +2 -0
udata/api/commands.py +0 -2
udata/api_fields.py +41 -3
udata/commands/db.py +88 -48
udata/core/dataservices/factories.py +33 -0
udata/core/dataservices/models.py +42 -4
udata/core/dataservices/rdf.py +106 -0
udata/core/dataset/csv.py +8 -1
udata/core/dataset/models.py +1 -2
udata/core/dataset/rdf.py +37 -128
udata/core/discussions/models.py +20 -0
udata/core/organization/csv.py +5 -3
udata/core/reports/__init__.py +0 -0
udata/core/reports/api.py +44 -0
udata/core/reports/constants.py +30 -0
udata/core/reports/models.py +58 -0
udata/core/reuse/csv.py +3 -0
udata/core/site/api.py +33 -2
udata/core/site/rdf.py +6 -1
udata/core/spam/models.py +6 -0
udata/core/topic/models.py +3 -2
udata/core/topic/parsers.py +3 -2
udata/core/user/apiv2.py +28 -0
udata/db/__init__.py +0 -0
udata/db/tasks.py +6 -0
udata/features/notifications/__init__.py +0 -1
udata/forms/fields.py +2 -2
udata/harvest/api.py +19 -1
udata/harvest/backends/base.py +118 -10
udata/harvest/backends/dcat.py +28 -7
udata/harvest/models.py +6 -0
udata/harvest/tests/dcat/bnodes.xml +13 -2
udata/harvest/tests/test_dcat_backend.py +21 -0
udata/migrations/2024-06-11-fix-reuse-datasets-references.py +35 -0
udata/models/__init__.py +1 -0
udata/rdf.py +113 -2
udata/routing.py +1 -1
udata/settings.py +3 -1
udata/static/admin.js +17 -17
udata/static/admin.js.map +1 -1
udata/static/chunks/{18.ad41fb75ac4226e1f3ce.js → 18.1922fd0b2b7fad122991.js} +3 -3
udata/static/chunks/18.1922fd0b2b7fad122991.js.map +1 -0
udata/static/chunks/{7.11ac4de064ae59691d49.js → 7.e2106342e94ee09393b1.js} +2 -2
udata/static/chunks/7.e2106342e94ee09393b1.js.map +1 -0
udata/static/common.js +1 -1
udata/static/common.js.map +1 -1
udata/storage/s3.py +3 -3
udata/tasks.py +1 -0
udata/tests/api/test_dataservices_api.py +26 -2
udata/tests/api/test_datasets_api.py +1 -1
udata/tests/api/test_reports_api.py +87 -0
udata/tests/apiv2/test_me_api.py +40 -0
udata/tests/dataset/test_dataset_rdf.py +19 -1
udata/tests/frontend/test_auth.py +1 -4
udata/tests/organization/test_csv_adapter.py +0 -1
udata/tests/plugin.py +2 -0
udata/tests/site/test_site_api.py +0 -1
udata/tests/site/test_site_rdf.py +66 -0
udata/tests/test_discussions.py +24 -34
udata/tests/test_model.py +3 -2
udata/tests/test_utils.py +1 -1
udata/translations/ar/LC_MESSAGES/udata.mo +0 -0
udata/translations/ar/LC_MESSAGES/udata.po +128 -64
udata/translations/de/LC_MESSAGES/udata.mo +0 -0
udata/translations/de/LC_MESSAGES/udata.po +128 -64
udata/translations/es/LC_MESSAGES/udata.mo +0 -0
udata/translations/es/LC_MESSAGES/udata.po +128 -64
udata/translations/fr/LC_MESSAGES/udata.mo +0 -0
udata/translations/fr/LC_MESSAGES/udata.po +128 -64
udata/translations/it/LC_MESSAGES/udata.mo +0 -0
udata/translations/it/LC_MESSAGES/udata.po +128 -64
udata/translations/pt/LC_MESSAGES/udata.mo +0 -0
udata/translations/pt/LC_MESSAGES/udata.po +128 -64
udata/translations/sr/LC_MESSAGES/udata.mo +0 -0
udata/translations/sr/LC_MESSAGES/udata.po +128 -64
udata/translations/udata.pot +129 -65
udata/uris.py +14 -13
{udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/METADATA +26 -7
{udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/RECORD +84 -72
udata/static/chunks/18.ad41fb75ac4226e1f3ce.js.map +0 -1
udata/static/chunks/7.11ac4de064ae59691d49.js.map +0 -1
{udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/LICENSE +0 -0
{udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/WHEEL +0 -0
{udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/entry_points.txt +0 -0
{udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/top_level.txt +0 -0

udata/core/dataset/rdf.py CHANGED Viewed

@@ -6,8 +6,6 @@ import json
 import logging
 from datetime import date
-from html.parser import HTMLParser
-from typing import Optional
 from dateutil.parser import parse as parse_dt
 from flask import current_app
 from geomet import wkt
@@ -18,12 +16,13 @@ from mongoengine.errors import ValidationError
 from udata import i18n, uris
 from udata.core.spatial.models import SpatialCoverage
-from udata.frontend.markdown import parse_html
 from udata.core.dataset.models import HarvestDatasetMetadata, HarvestResourceMetadata
-from udata.models import db, ContactPoint
+from udata.harvest.exceptions import HarvestSkipException
+from udata.models import db
 from udata.rdf import (
-    DCAT, DCT, FREQ, SCV, SKOS, SPDX, SCHEMA, EUFREQ, EUFORMAT, IANAFORMAT, VCARD, RDFS,
-    namespace_manager, schema_from_rdf, url_from_rdf
+    DCAT, DCATAP, DCT, FREQ, SCV, SKOS, SPDX, SCHEMA, EUFREQ, EUFORMAT, IANAFORMAT, TAG_TO_EU_HVD_CATEGORIES, RDFS,
+    namespace_manager, rdf_value, remote_url_from_rdf, sanitize_html, schema_from_rdf, themes_from_rdf, url_from_rdf, HVD_LEGISLATION,
+    contact_point_from_rdf,
 )
 from udata.utils import get_by, safe_unicode
 from udata.uris import endpoint_for
@@ -76,43 +75,6 @@ EU_RDF_REQUENCIES = {
     EUFREQ.NEVER: 'punctual',
 }
-# Map High Value Datasets URIs to keyword categories
-EU_HVD_CATEGORIES = {
-    "http://data.europa.eu/bna/c_164e0bf5": "Météorologiques",
-    "http://data.europa.eu/bna/c_a9135398": "Entreprises et propriété d'entreprises",
-    "http://data.europa.eu/bna/c_ac64a52d": "Géospatiales",
-    "http://data.europa.eu/bna/c_b79e35eb": "Mobilité",
-    "http://data.europa.eu/bna/c_dd313021": "Observation de la terre et environnement",
-    "http://data.europa.eu/bna/c_e1da4e07": "Statistiques"
-}
-class HTMLDetector(HTMLParser):
-    def __init__(self, *args, **kwargs):
-        HTMLParser.__init__(self, *args, **kwargs)
-        self.elements = set()
-    def handle_starttag(self, tag, attrs):
-        self.elements.add(tag)
-    def handle_endtag(self, tag):
-        self.elements.add(tag)
-def is_html(text):
-    parser = HTMLDetector()
-    parser.feed(text)
-    return bool(parser.elements)
-def sanitize_html(text):
-    text = text.toPython() if isinstance(text, Literal) else ''
-    if is_html(text):
-        return parse_html(text)
-    else:
-        return text.strip()
 def temporal_to_rdf(daterange, graph=None):
     if not daterange:
         return
@@ -141,7 +103,7 @@ def owner_to_rdf(dataset, graph=None):
     return
-def resource_to_rdf(resource, dataset=None, graph=None):
+def resource_to_rdf(resource, dataset=None, graph=None, is_hvd=False):
     '''
     Map a Resource domain model to a DCAT/RDF graph
     '''
@@ -180,22 +142,31 @@ def resource_to_rdf(resource, dataset=None, graph=None):
         checksum.add(SPDX.algorithm, getattr(SPDX, algorithm))
         checksum.add(SPDX.checksumValue, Literal(resource.checksum.value))
         r.add(SPDX.checksum, checksum)
+    if is_hvd:
+        # DCAT-AP HVD applicable legislation is also expected at the distribution level
+        r.add(DCATAP.applicableLegislation, URIRef(HVD_LEGISLATION))
     return r
+def dataset_to_graph_id(dataset: Dataset) -> URIRef | BNode:
+    if dataset.harvest and dataset.harvest.uri:
+        return URIRef(dataset.harvest.uri)
+    elif dataset.id:
+        return URIRef(endpoint_for('datasets.show_redirect', 'api.dataset',
+            dataset=dataset.id, _external=True))
+    else:
+        # Should not happen in production. Some test only
+        # `build()` a dataset without saving it to the DB.
+        return BNode()
 def dataset_to_rdf(dataset, graph=None):
     '''
     Map a dataset domain model to a DCAT/RDF graph
     '''
     # Use the unlocalized permalink to the dataset as URI when available
     # unless there is already an upstream URI
-    if dataset.harvest and dataset.harvest.uri:
-        id = URIRef(dataset.harvest.uri)
-    elif dataset.id:
-        id = URIRef(endpoint_for('datasets.show_redirect', 'api.dataset',
-                    dataset=dataset.id, _external=True))
-    else:
-        id = BNode()
+    id = dataset_to_graph_id(dataset)
     # Expose upstream identifier if present
     if dataset.harvest and dataset.harvest.dct_identifier:
         identifier = dataset.harvest.dct_identifier
@@ -214,11 +185,20 @@ def dataset_to_rdf(dataset, graph=None):
     if dataset.acronym:
         d.set(SKOS.altLabel, Literal(dataset.acronym))
+    # Add DCAT-AP HVD properties if the dataset is tagged hvd.
+    # See https://semiceu.github.io/DCAT-AP/releases/2.2.0-hvd/
+    is_hvd = current_app.config['HVD_SUPPORT'] and 'hvd' in dataset.tags
+    if is_hvd:
+        d.add(DCATAP.applicableLegislation, URIRef(HVD_LEGISLATION))
     for tag in dataset.tags:
         d.add(DCAT.keyword, Literal(tag))
+        # Add HVD category if this dataset is tagged HVD
+        if is_hvd and tag in TAG_TO_EU_HVD_CATEGORIES:
+            d.add(DCATAP.hvdCategory, URIRef(TAG_TO_EU_HVD_CATEGORIES[tag]))
     for resource in dataset.resources:
-        d.add(DCAT.distribution, resource_to_rdf(resource, dataset, graph))
+        d.add(DCAT.distribution, resource_to_rdf(resource, dataset, graph, is_hvd))
     if dataset.temporal_coverage:
         d.set(DCT.temporal, temporal_to_rdf(dataset.temporal_coverage, graph))
@@ -241,18 +221,6 @@ CHECKSUM_ALGORITHMS = {
 }
-def serialize_value(value):
-    if isinstance(value, (URIRef, Literal)):
-        return value.toPython()
-    elif isinstance(value, RdfResource):
-        return value.identifier.toPython()
-def rdf_value(obj, predicate, default=None):
-    value = obj.value(predicate)
-    return serialize_value(value) if value else default
 def temporal_from_literal(text):
     '''
     Parse a temporal coverage from a literal ie. either:
@@ -327,29 +295,6 @@ def temporal_from_rdf(period_of_time):
         # so we log the error for future investigation and improvement
         log.warning('Unable to parse temporal coverage', exc_info=True)
-def contact_point_from_rdf(rdf, dataset):
-    contact_point = rdf.value(DCAT.contactPoint)
-    if contact_point:
-        name = rdf_value(contact_point, VCARD.fn) or ''
-        email = (rdf_value(contact_point, VCARD.hasEmail)
-                 or rdf_value(contact_point, VCARD.email)
-                 or rdf_value(contact_point, DCAT.email))
-        if not email:
-            return
-        email = email.replace('mailto:', '').strip()
-        if dataset.organization:
-            contact_point = ContactPoint.objects(
-                name=name, email=email, organization=dataset.organization).first()
-            return (contact_point or
-                    ContactPoint(name=name, email=email, organization=dataset.organization).save())
-        elif dataset.owner:
-            contact_point = ContactPoint.objects(
-                name=name, email=email, owner=dataset.owner).first()
-            return (contact_point or
-                    ContactPoint(name=name, email=email, owner=dataset.owner).save())
 def spatial_from_rdf(graph):
     geojsons = []
     for term in graph.objects(DCT.spatial):
@@ -489,43 +434,6 @@ def title_from_rdf(rdf, url):
         else:
             return i18n._('Nameless resource')
-def remote_url_from_rdf(rdf):
-    '''
-    Return DCAT.landingPage if found and uri validation succeeds.
-    Use RDF identifier as fallback if uri validation succeeds.
-    '''
-    landing_page = url_from_rdf(rdf, DCAT.landingPage)
-    uri = rdf.identifier.toPython()
-    for candidate in [landing_page, uri]:
-        if candidate:
-            try:
-                uris.validate(candidate)
-                return candidate
-            except uris.ValidationError:
-                pass
-def theme_labels_from_rdf(rdf):
-    '''
-    Get theme labels to use as keywords.
-    Map HVD keywords from known URIs resources if HVD support is activated.
-    '''
-    for theme in rdf.objects(DCAT.theme):
-        if isinstance(theme, RdfResource):
-            uri = theme.identifier.toPython()
-            if current_app.config['HVD_SUPPORT'] and uri in EU_HVD_CATEGORIES:
-                label = EU_HVD_CATEGORIES[uri]
-                # Additionnally yield hvd keyword
-                yield 'hvd'
-            else:
-                label = rdf_value(theme, SKOS.prefLabel)
-        else:
-            label = theme.toPython()
-        if label:
-            yield label
 def resource_from_rdf(graph_or_distrib, dataset=None, is_additionnal=False):
     '''
     Map a Resource domain model to a DCAT/RDF graph
@@ -603,6 +511,9 @@ def dataset_from_rdf(graph: Graph, dataset=None, node=None):
     d = graph.resource(node)
     dataset.title = rdf_value(d, DCT.title)
+    if not dataset.title:
+        raise HarvestSkipException("missing title on dataset")
     # Support dct:abstract if dct:description is missing (sometimes used instead)
     description = d.value(DCT.description) or d.value(DCT.abstract)
     dataset.description = sanitize_html(description)
@@ -620,9 +531,7 @@ def dataset_from_rdf(graph: Graph, dataset=None, node=None):
     if acronym:
         dataset.acronym = acronym
-    tags = [tag.toPython() for tag in d.objects(DCAT.keyword)]
-    tags += theme_labels_from_rdf(d)
-    dataset.tags = list(set(tags))
+    dataset.tags = themes_from_rdf(d)
     temporal_coverage = temporal_from_rdf(d.value(DCT.temporal))
     if temporal_coverage:
@@ -675,7 +584,7 @@ def dataset_from_rdf(graph: Graph, dataset=None, node=None):
     return dataset
-def bbox_to_geojson_multipolygon(bbox_as_str: str) -> Optional[dict] :
+def bbox_to_geojson_multipolygon(bbox_as_str: str) -> dict | None:
     bbox = bbox_as_str.strip().split(',')
     if len(bbox) != 4:
         return None

udata/core/discussions/models.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import logging
 from datetime import datetime
+from flask_login import current_user
 from udata.mongo import db
 from udata.core.spam.models import SpamMixin, spam_protected
 from .signals import (on_new_discussion, on_discussion_closed, on_new_discussion_comment)
@@ -67,6 +69,24 @@ class Discussion(SpamMixin, db.Document):
     def embeds_to_check_for_spam(self):
         return self.discussion[1:]
+    def spam_is_whitelisted(self) -> bool:
+        from udata.core.dataset.permissions import OwnablePermission
+        from udata.core.owned import Owned
+        if not current_user or not current_user.is_authenticated:
+            return False
+        if not isinstance(self.subject, Owned):
+            return False
+        # When creating a new Discussion the `subject` is an empty model
+        # with only `id`. We need to fetch it from the database to have
+        # all the required information
+        if not self.subject.owner or not self.subject.organization:
+            self.subject.reload()
+        return OwnablePermission(self.subject).can()
     @property
     def external_url(self):
         return self.subject.url_for(

udata/core/organization/csv.py CHANGED Viewed

@@ -15,18 +15,20 @@ class OrganizationCsvAdapter(csv.Adapter):
         ('url', 'external_url'),
         'description',
         ('logo', lambda o: o.logo(external=True)),
-        ('badges', lambda o: [badge.kind for badge in o.badges]),
+        ('badges', lambda o: ','.join([badge.kind for badge in o.badges])),
         'created_at',
         'last_modified',
+        'business_number_id',
+        ('members_count', lambda o: len(o.members)),
     )
     def dynamic_fields(self):
         return csv.metric_fields(Organization) + self.get_dynamic_field_downloads()
     def get_dynamic_field_downloads(self):
         downloads_counts = self.get_downloads_counts()
         return [('downloads', lambda o: downloads_counts.get(str(o.id), 0))]
     def get_downloads_counts(self):
         '''
         Prefetch all the resources' downloads for all selected organization into memory

udata/core/reports/__init__.py ADDED Viewed

File without changes

udata/core/reports/api.py ADDED Viewed

@@ -0,0 +1,44 @@
+import mongoengine
+from flask import request
+from flask_login import current_user
+from udata.api import API, api, fields
+from udata.api_fields import patch
+from .constants import reports_reasons_translations
+from .models import Report
+ns = api.namespace('reports', 'User reported objects related operations (beta)')
+@ns.route('/', endpoint='reports')
+class ReportsAPI(API):
+    @api.doc('list_reports')
+    @api.expect(Report.__index_parser__)
+    @api.marshal_with(Report.__page_fields__)
+    def get(self):
+        query = Report.objects
+        return Report.apply_sort_filters_and_pagination(query)
+    @api.secure
+    @api.doc('create_report', responses={400: 'Validation error'})
+    @api.expect(Report.__write_fields__)
+    @api.marshal_with(Report.__read_fields__, code=201)
+    def post(self):
+        report = patch(Report(), request)
+        report.by = current_user._get_current_object()
+        try:
+            report.save()
+        except mongoengine.errors.ValidationError as e:
+            api.abort(400, e.message)
+        return report, 201
+@ns.route('/reasons/', endpoint='reports_reasons')
+class ReportsReasonsAPI(API):
+    @api.doc('list_reports_reasons')
+    @ns.response(200, "list of available reasons associated with their labels", fields.Raw)
+    def get(self):
+        return reports_reasons_translations()

udata/core/reports/constants.py ADDED Viewed

@@ -0,0 +1,30 @@
+from udata.core.dataset.models import Dataset
+from udata.i18n import lazy_gettext as _
+REASON_PERSONAL_DATA = "personal_data"
+REASON_EXPLICIT_CONTENT = "explicit_content"
+REASON_ILLEGAL_CONTENT = "illegal_content"
+REASON_OTHERS = "others"
+REASON_SECURITY = "security"
+REASON_SPAM = "spam"
+def reports_reasons_translations() -> list:
+    """
+    This is a function to avoid creating the list with a wrong lang
+    at the start of the app.
+    """
+    return [
+        {"value": REASON_EXPLICIT_CONTENT, "label": _("Explicit content")},
+        {"value": REASON_ILLEGAL_CONTENT, "label": _("Illegal content")},
+        {"value": REASON_OTHERS, "label": _("Others")},
+        {"value": REASON_PERSONAL_DATA, "label": _("Personal data")},
+        {"value": REASON_SECURITY, "label": _("Security")},
+        {"value": REASON_SPAM, "label": _("Spam")},
+    ]
+REPORT_REASONS_CHOICES: list[str] = [
+    item["value"] for item in reports_reasons_translations()
+]
+REPORTABLE_MODELS = [Dataset]

udata/core/reports/models.py ADDED Viewed

@@ -0,0 +1,58 @@
+from datetime import datetime
+from mongoengine import NULLIFY, signals
+from udata.api_fields import field, generate_fields
+from udata.core.user.api_fields import user_ref_fields
+from udata.core.user.models import User
+from udata.mongo import db
+from .constants import REPORT_REASONS_CHOICES, REPORTABLE_MODELS
+@generate_fields()
+class Report(db.Document):
+    by = field(
+        db.ReferenceField(User, reverse_delete_rule=NULLIFY),
+        nested_fields=user_ref_fields,
+        description="Only set if a user was connected when reporting an element.",
+        readonly=True,
+        allow_null=True,
+    )
+    object_type = field(
+        db.StringField(choices=[m.__name__ for m in REPORTABLE_MODELS])
+    )
+    object_id = field(
+        db.ObjectIdField()
+    )
+    object_deleted_at = field(
+        db.DateTimeField(),
+        allow_null=True,
+        readonly=True,
+    )
+    reason = field(
+        db.StringField(choices=REPORT_REASONS_CHOICES, required=True),
+    )
+    message = field(
+        db.StringField(),
+    )
+    reported_at = field(
+        db.DateTimeField(default=datetime.utcnow, required=True),
+        readonly=True,
+    )
+    @classmethod
+    def mark_as_deleted_soft_delete(cls, sender, document, **kwargs):
+        if document.deleted:
+            Report.objects(object_type=sender.__name__, object_id=document.id, object_deleted_at=None).update(object_deleted_at=datetime.utcnow)
+    def mark_as_deleted_hard_delete(cls, document, **kwargs):
+        Report.objects(object_type=document.__class__.__name__, object_id=document.id, object_deleted_at=None).update(object_deleted_at=datetime.utcnow)
+for model in REPORTABLE_MODELS:
+    signals.post_save.connect(Report.mark_as_deleted_soft_delete, sender=model)
+    signals.post_delete.connect(Report.mark_as_deleted_hard_delete, sender=model)

udata/core/reuse/csv.py CHANGED Viewed

@@ -15,10 +15,13 @@ class ReuseCsvAdapter(csv.Adapter):
         ('remote_url', 'url'),
         ('organization', 'organization.name'),
         ('organization_id', 'organization.id'),
+        ('owner', 'owner.slug'),  # in case it's owned by a user
+        ('owner_id', 'owner.id'),
         ('image', lambda r: r.image(external=True)),
         ('featured', lambda r: r.featured or False),
         'created_at',
         'last_modified',
+        'topic',
         ('tags', lambda r: ','.join(r.tags)),
         ('datasets', lambda r: ','.join([str(d.id) for d in r.datasets])),
     )

udata/core/site/api.py CHANGED Viewed

@@ -1,9 +1,11 @@
 from bson import ObjectId
 from flask import request, redirect, url_for, json, make_response
+from mongoengine import Q
 from udata.api import api, API, fields
 from udata.auth import admin_permission
+from udata.core.dataservices.models import Dataservice
 from udata.models import Dataset, Reuse
 from udata.utils import multi_to_dict
 from udata.rdf import (
@@ -105,8 +107,37 @@ class SiteRdfCatalogFormat(API):
         params = multi_to_dict(request.args)
         page = int(params.get('page', 1))
         page_size = int(params.get('page_size', 100))
-        datasets = Dataset.objects.visible().paginate(page, page_size)
-        catalog = build_catalog(current_site, datasets, format=format)
+        datasets = Dataset.objects.visible()
+        if 'tag' in params:
+            datasets = datasets.filter(tags=params.get('tag', ''))
+        datasets = datasets.paginate(page, page_size)
+        # We need to add Dataservice to the catalog.
+        # In the best world, we want:
+        # - Keep the correct number of datasets on the page (if the requested page size is 100, we should have 100 datasets)
+        # - Have simple MongoDB queries
+        # - Do not duplicate the datasets (each dataset is present once in the catalog)
+        # - Do not duplicate the dataservices (each dataservice is present once in the catalog)
+        # - Every referenced dataset for one dataservices present on the page (hard to do)
+        #
+        # Multiple solutions are possible but none check all the constraints.
+        # The selected one is to put all the dataservices referencing at least one of the dataset on
+        # the page at the end of it. It means dataservices could be duplicated (present on multiple pages)
+        # and these dataservices may referenced some datasets not present in the current page. It's working
+        # if somebody is doing the same thing as us (keeping the list of all the datasets IDs for the entire catalog then
+        # listing all dataservices in a second pass)
+        # Another option is to do some tricky Mongo requests to order/group datasets by their presence in some dataservices but
+        # it could be really hard to do with a n..n relation.
+        # Let's keep this solution simple right now and iterate on it in the future.
+        dataservices_filter = Q(datasets__in=[d.id for d in datasets])
+        # On the first page, add all dataservices without datasets
+        if page == 1:
+            dataservices_filter = dataservices_filter | Q(datasets__size=0)
+        dataservices = Dataservice.objects.visible().filter(dataservices_filter)
+        catalog = build_catalog(current_site, datasets, dataservices=dataservices, format=format)
         # bypass flask-restplus make_response, since graph_response
         # is handling the content negociation directly
         return make_response(*graph_response(catalog, format))

udata/core/site/rdf.py CHANGED Viewed

@@ -5,6 +5,7 @@ from flask import url_for, current_app
 from rdflib import Graph, URIRef, Literal, BNode
 from rdflib.namespace import RDF, FOAF
+from udata.core.dataservices.rdf import dataservice_to_rdf
 from udata.core.dataset.rdf import dataset_to_rdf
 from udata.core.organization.rdf import organization_to_rdf
 from udata.core.user.rdf import user_to_rdf
@@ -13,7 +14,7 @@ from udata.utils import Paginable
 from udata.uris import endpoint_for
-def build_catalog(site, datasets, format=None):
+def build_catalog(site, datasets, dataservices = [], format=None):
     '''Build the DCAT catalog for this site'''
     site_url = endpoint_for('site.home_redirect', 'api.site', _external=True)
     catalog_url = url_for('api.site_rdf_catalog', _external=True)
@@ -40,6 +41,10 @@ def build_catalog(site, datasets, format=None):
             rdf_dataset.add(DCT.publisher, organization_to_rdf(dataset.organization, graph))
         catalog.add(DCAT.dataset, rdf_dataset)
+    for dataservice in dataservices:
+        rdf_dataservice = dataservice_to_rdf(dataservice, graph)
+        catalog.add(DCAT.DataService, rdf_dataservice)
     if isinstance(datasets, Paginable):
         paginate_catalog(catalog, graph, datasets, format, 'api.site_rdf_catalog_format')

udata/core/spam/models.py CHANGED Viewed

@@ -67,6 +67,9 @@ class SpamMixin(object):
         if not self.spam:
             self.spam = SpamInfo(status=NOT_CHECKED, callbacks={})
+        if self.spam_is_whitelisted():
+            return
         # The breadcrumb is useful during reporting to know where we came from
         # in case of a potential spam inside an embed.
         if breadcrumb is None:
@@ -139,6 +142,9 @@ class SpamMixin(object):
     def embeds_to_check_for_spam(self):
         return []
+    def spam_is_whitelisted(self) -> bool :
+        return False
     def spam_report_message(self):
         return f"Spam potentiel sur {type(self).__name__}"

udata/core/topic/models.py CHANGED Viewed

@@ -3,7 +3,7 @@ from mongoengine.signals import pre_save
 from udata.models import db, SpatialCoverage
 from udata.search import reindex
 from udata.tasks import as_task_param
-from udata.core.owned import Owned
+from udata.core.owned import Owned, OwnedQuerySet
 __all__ = ('Topic', )
@@ -36,7 +36,8 @@ class Topic(db.Document, Owned, db.Datetimed):
             'slug'
         ] + Owned.meta['indexes'],
         'ordering': ['-created_at'],
-        'auto_create_index_on_save': True
+        'auto_create_index_on_save': True,
+        'queryset_class': OwnedQuerySet,
     }
     def __str__(self):

udata/core/topic/parsers.py CHANGED Viewed

@@ -11,10 +11,11 @@ class TopicApiParser(ModelApiParser):
         'last_modified': 'last_modified',
     }
-    def __init__(self):
+    def __init__(self, with_include_private=True):
         super().__init__()
+        if with_include_private:
+            self.parser.add_argument('include_private', type=bool, location='args')
         self.parser.add_argument('tag', type=str, location='args')
-        self.parser.add_argument('include_private', type=bool, location='args')
         self.parser.add_argument('geozone', type=str, location='args')
         self.parser.add_argument('granularity', type=str, location='args')
         self.parser.add_argument('organization', type=str, location='args')

udata/core/user/apiv2.py ADDED Viewed

@@ -0,0 +1,28 @@
+from flask_security import current_user
+from udata.api import apiv2, API
+from udata.core.topic.apiv2 import topic_page_fields
+from udata.core.topic.parsers import TopicApiParser
+from udata.models import Topic
+me = apiv2.namespace('me', 'Connected user related operations (v2)')
+# we will force include_private to True, no need for this arg
+topic_parser = TopicApiParser(with_include_private=False)
+@me.route('/org_topics/', endpoint='my_org_topics')
+class MyOrgTopicsAPI(API):
+    @apiv2.secure
+    @apiv2.doc('my_org_topics')
+    @apiv2.expect(topic_parser.parser)
+    @apiv2.marshal_list_with(topic_page_fields)
+    def get(self):
+        '''List all topics related to me and my organizations.'''
+        args = topic_parser.parse()
+        args["include_private"] = True
+        owners = list(current_user.organizations) + [current_user.id]
+        topics = Topic.objects.owned_by(*owners)
+        topics = topic_parser.parse_filters(topics, args)
+        sort = args['sort'] or ('$text_score' if args['q'] else None) or '-last-modified'
+        return topics.order_by(sort).paginate(args['page'], args['page_size'])

udata/db/__init__.py ADDED Viewed

File without changes

udata/db/tasks.py ADDED Viewed

@@ -0,0 +1,6 @@
+from udata.commands.db import check_references
+from udata.tasks import job
+@job('check-integrity')
+def check_integrity(self):
+    check_references()

udata/features/notifications/__init__.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from importlib import import_module
 import logging

udata/forms/fields.py CHANGED Viewed

@@ -206,8 +206,8 @@ class URLField(EmptyNone, Field):
         if self.data:
             try:
                 uris.validate(self.data)
-            except uris.ValidationError:
-                raise validators.ValidationError(_('Invalid URL'))
+            except uris.ValidationError as e:
+                raise validators.ValidationError(str(e))
         return True
     def process_formdata(self, valuelist):

udata 8.0.2.dev29304__py2.py3-none-any.whl → 9.1.0__py2.py3-none-any.whl

Potentially problematic release.

udata 8.0.2.dev29304py2.py3-none-any.whl → 9.1.0py2.py3-none-any.whl