PyPI - udata - Versions diffs - 14.4.1.dev7__py3-none-any.whl → 14.5.1.dev6__py3-none-any.whl - Mend

udata 14.4.1.dev7py3-none-any.whl → 14.5.1.dev6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of udata might be problematic. Click here for more details.

Files changed (45) hide show

udata/api/__init__.py +2 -0
udata/auth/views.py +7 -3
udata/commands/dcat.py +1 -1
udata/core/dataservices/api.py +8 -1
udata/core/dataservices/apiv2.py +2 -5
udata/core/dataservices/models.py +4 -1
udata/core/dataservices/rdf.py +2 -1
udata/core/dataservices/tasks.py +6 -2
udata/core/dataset/api.py +8 -0
udata/core/dataset/models.py +4 -4
udata/core/dataset/rdf.py +8 -2
udata/core/dataset/tasks.py +6 -2
udata/core/discussions/api.py +15 -1
udata/core/discussions/models.py +5 -0
udata/core/legal/__init__.py +0 -0
udata/core/legal/mails.py +128 -0
udata/core/organization/api.py +8 -0
udata/core/organization/apiv2.py +2 -3
udata/core/organization/models.py +6 -1
udata/core/reuse/api.py +8 -0
udata/core/reuse/apiv2.py +2 -5
udata/core/topic/models.py +8 -2
udata/core/user/api.py +10 -3
udata/core/user/models.py +7 -1
udata/flask_mongoengine/pagination.py +1 -1
udata/harvest/backends/dcat.py +4 -1
udata/harvest/tests/test_dcat_backend.py +24 -0
udata/mail.py +14 -0
udata/rdf.py +13 -4
udata/settings.py +4 -0
udata/tests/api/test_datasets_api.py +6 -0
udata/tests/apiv2/test_search.py +30 -0
udata/tests/dataservice/test_dataservice_tasks.py +29 -0
udata/tests/dataset/test_dataset_tasks.py +25 -0
udata/tests/frontend/test_auth.py +34 -0
udata/tests/helpers.py +6 -0
udata/tests/search/test_search_integration.py +33 -0
udata/tests/test_api_fields.py +10 -0
udata/tests/test_legal_mails.py +359 -0
{udata-14.4.1.dev7.dist-info → udata-14.5.1.dev6.dist-info}/METADATA +2 -2
{udata-14.4.1.dev7.dist-info → udata-14.5.1.dev6.dist-info}/RECORD +45 -40
{udata-14.4.1.dev7.dist-info → udata-14.5.1.dev6.dist-info}/WHEEL +0 -0
{udata-14.4.1.dev7.dist-info → udata-14.5.1.dev6.dist-info}/entry_points.txt +0 -0
{udata-14.4.1.dev7.dist-info → udata-14.5.1.dev6.dist-info}/licenses/LICENSE +0 -0
{udata-14.4.1.dev7.dist-info → udata-14.5.1.dev6.dist-info}/top_level.txt +0 -0

udata/core/user/models.py CHANGED Viewed

@@ -18,6 +18,7 @@ from udata.core.discussions.models import Discussion
 from udata.core.linkable import Linkable
 from udata.core.storages import avatars, default_image_basename
 from udata.frontend.markdown import mdstrip
+from udata.i18n import lazy_gettext as _
 from udata.models import Follow, WithMetrics, db
 from udata.uris import cdata_url
@@ -62,7 +63,10 @@ class User(WithMetrics, UserMixin, Linkable, db.Document):
         db.ImageField(fs=avatars, basename=default_image_basename, thumbnails=AVATAR_SIZES)
     )
     website = field(db.URLField())
-    about = field(db.StringField())
+    about = field(
+        db.StringField(),
+        markdown=True,
+    )
     prefered_language = field(db.StringField())
@@ -116,6 +120,8 @@ class User(WithMetrics, UserMixin, Linkable, db.Document):
         "auto_create_index_on_save": True,
     }
+    verbose_name = _("account")
     __metrics_keys__ = [
         "datasets",
         "reuses",

udata/flask_mongoengine/pagination.py CHANGED Viewed

@@ -6,7 +6,7 @@ from mongoengine.queryset import QuerySet
 class Pagination(object):
     def __init__(self, iterable, page, per_page):
-        if page < 1:
+        if page < 1 or per_page < 1:
             abort(404)
         self.iterable = iterable

udata/harvest/backends/dcat.py CHANGED Viewed

@@ -225,7 +225,9 @@ class DcatBackend(BaseBackend):
         dataset = self.get_dataset(item.remote_id)
         remote_url_prefix = self.get_extra_config_value("remote_url_prefix")
-        dataset = dataset_from_rdf(page, dataset, node=node, remote_url_prefix=remote_url_prefix)
+        dataset = dataset_from_rdf(
+            page, dataset, node=node, remote_url_prefix=remote_url_prefix, dryrun=self.dryrun
+        )
         if dataset.organization:
             dataset.organization.compute_aggregate_metrics = False
             self.organizations_to_update.add(dataset.organization)
@@ -242,6 +244,7 @@ class DcatBackend(BaseBackend):
             node,
             [item.dataset for item in self.job.items],
             remote_url_prefix=remote_url_prefix,
+            dryrun=self.dryrun,
         )
     def get_node_from_item(self, graph, item):

udata/harvest/tests/test_dcat_backend.py CHANGED Viewed

@@ -972,6 +972,30 @@ class DcatBackendTest(PytestOnlyDBTestCase):
         assert "connection error" in mock_warning.call_args[0][0].lower()
         mock_exception.assert_not_called()
+    def test_preview_does_not_create_contact_points(self, rmock):
+        """Preview should not create ContactPoints in DB."""
+        from udata.core.contact_point.models import ContactPoint
+        LicenseFactory(id="lov2", title="Licence Ouverte Version 2.0")
+        LicenseFactory(id="lov1", title="Licence Ouverte Version 1.0")
+        url = mock_dcat(rmock, "catalog.xml", path="catalog.xml")
+        org = OrganizationFactory()
+        source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
+        assert ContactPoint.objects.count() == 0
+        job = actions.preview(source)
+        assert job.status == "done"
+        assert len(job.items) == 4
+        # No ContactPoints should have been created in the database
+        assert ContactPoint.objects.count() == 0
+        # No datasets should have been created either
+        assert Dataset.objects.count() == 0
 @pytest.mark.options(HARVESTER_BACKENDS=["csw*"])
 class CswDcatBackendTest(PytestOnlyDBTestCase):

udata/mail.py CHANGED Viewed

@@ -39,6 +39,20 @@ class LabelledContent:
         )
+@dataclass
+class Link:
+    """Simple linkable object for use in ParagraphWithLinks"""
+    label: str
+    url: str
+    def __str__(self):
+        return str(self.label)
+    def url_for(self, **kwargs):
+        return self.url
 @dataclass
 class ParagraphWithLinks:
     paragraph: LazyString

udata/rdf.py CHANGED Viewed

@@ -367,7 +367,7 @@ def contact_point_name(agent_name: str | None, org_name: str | None) -> str:
     return agent_name or org_name or ""
-def contact_points_from_rdf(rdf, prop, role, dataset):
+def contact_points_from_rdf(rdf, prop, role, dataset, dryrun=False):
     if not dataset.organization and not dataset.owner:
         return
     for contact_point in rdf.objects(prop):
@@ -414,9 +414,18 @@ def contact_points_from_rdf(rdf, prop, role, dataset):
         else:
             org_or_owner = {"owner": dataset.owner}
         try:
-            contact, _ = ContactPoint.objects.get_or_create(
-                name=name, email=email, contact_form=contact_form, role=role, **org_or_owner
-            )
+            if dryrun:
+                # In dryrun mode, only reuse existing contact points, don't create new ones.
+                # Mongoengine doesn't allow referencing unsaved documents.
+                contact = ContactPoint.objects.filter(
+                    name=name, email=email, contact_form=contact_form, role=role, **org_or_owner
+                ).first()
+                if not contact:
+                    continue
+            else:
+                contact, _ = ContactPoint.objects.get_or_create(
+                    name=name, email=email, contact_form=contact_form, role=role, **org_or_owner
+                )
         except mongoengine.errors.ValidationError as validation_error:
             log.warning(f"Unable to validate contact point: {validation_error}", exc_info=True)
             continue

udata/settings.py CHANGED Viewed

@@ -174,6 +174,10 @@ class Defaults(object):
     SITE_AUTHOR = "Udata"
     SITE_GITHUB_URL = "https://github.com/etalab/udata"
+    TERMS_OF_USE_URL = None
+    TERMS_OF_USE_DELETION_ARTICLE = None
+    TELERECOURS_URL = None
     UDATA_INSTANCE_NAME = "udata"
     HARVESTER_BACKENDS = []

udata/tests/api/test_datasets_api.py CHANGED Viewed

@@ -1706,6 +1706,12 @@ class DatasetResourceAPITest(APITestCase):
         self.dataset.reload()
         self.assertEqual(len(self.dataset.resources), 2)
+    def test_create_with_list_returns_400(self):
+        """It should return 400 when sending a list instead of a dict"""
+        data = [ResourceFactory.as_dict()]
+        response = self.post(url_for("api.resources", dataset=self.dataset), data)
+        self.assert400(response)
     def test_create_with_file(self):
         """It should create a resource from the API with a file"""
         user = self.login()

udata/tests/apiv2/test_search.py ADDED Viewed

@@ -0,0 +1,30 @@
+from udata.core.dataservices.factories import DataserviceFactory
+from udata.core.organization.factories import OrganizationFactory
+from udata.core.reuse.factories import ReuseFactory
+from udata.tests.api import APITestCase
+class SearchAPIV2Test(APITestCase):
+    def test_dataservice_search_with_model_query_param(self):
+        """Searching dataservices with 'model' as query param should not crash.
+        Regression test for: TypeError: query() got multiple values for argument 'model'
+        """
+        DataserviceFactory.create_batch(3)
+        response = self.get("/api/2/dataservices/search/?model=malicious")
+        self.assert200(response)
+    def test_reuse_search_with_model_query_param(self):
+        """Searching reuses with 'model' as query param should not crash."""
+        ReuseFactory.create_batch(3)
+        response = self.get("/api/2/reuses/search/?model=malicious")
+        self.assert200(response)
+    def test_organization_search_with_model_query_param(self):
+        """Searching organizations with 'model' as query param should not crash."""
+        OrganizationFactory.create_batch(3)
+        response = self.get("/api/2/organizations/search/?model=malicious")
+        self.assert200(response)

udata/tests/dataservice/test_dataservice_tasks.py CHANGED Viewed

@@ -43,3 +43,32 @@ class DataserviceTasksTest(PytestOnlyDBTestCase):
         assert Discussion.objects.filter(id=discussion.id).count() == 0
         assert Follow.objects.filter(id=follower.id).count() == 0
         assert HarvestJob.objects.filter(items__dataservice=dataservices[0].id).count() == 0
+    def test_purge_dataservices_cleans_all_harvest_items_references(self):
+        """Test that purging dataservices cleans all HarvestItem references in a job.
+        The same dataservice can appear multiple times in a job's items (e.g. if the
+        harvest source has duplicates). The $ operator only updates the first match,
+        so we need to use $[] with array_filters to update all matches.
+        """
+        dataservice_to_delete = Dataservice.objects.create(
+            title="delete me", base_api_url="https://example.com/api", deleted_at="2016-01-01"
+        )
+        dataservice_keep = Dataservice.objects.create(
+            title="keep me", base_api_url="https://example.com/api"
+        )
+        job = HarvestJobFactory(
+            items=[
+                HarvestItem(dataservice=dataservice_to_delete, remote_id="1"),
+                HarvestItem(dataservice=dataservice_keep, remote_id="2"),
+                HarvestItem(dataservice=dataservice_to_delete, remote_id="3"),
+            ]
+        )
+        tasks.purge_dataservices()
+        job.reload()
+        assert job.items[0].dataservice is None
+        assert job.items[1].dataservice == dataservice_keep
+        assert job.items[2].dataservice is None

udata/tests/dataset/test_dataset_tasks.py CHANGED Viewed

@@ -60,6 +60,31 @@ class DatasetTasksTest(PytestOnlyDBTestCase):
         assert HarvestJob.objects.filter(items__dataset=datasets[0].id).count() == 0
         assert Dataservice.objects.filter(datasets=datasets[0].id).count() == 0
+    def test_purge_datasets_cleans_all_harvest_items_references(self):
+        """Test that purging datasets cleans all HarvestItem references in a job.
+        The same dataset can appear multiple times in a job's items (e.g. if the
+        harvest source has duplicates). The $ operator only updates the first match,
+        so we need to use $[] with array_filters to update all matches.
+        """
+        dataset_to_delete = Dataset.objects.create(title="delete me", deleted="2016-01-01")
+        dataset_keep = Dataset.objects.create(title="keep me")
+        job = HarvestJobFactory(
+            items=[
+                HarvestItem(dataset=dataset_to_delete, remote_id="1"),
+                HarvestItem(dataset=dataset_keep, remote_id="2"),
+                HarvestItem(dataset=dataset_to_delete, remote_id="3"),
+            ]
+        )
+        tasks.purge_datasets()
+        job.reload()
+        assert job.items[0].dataset is None
+        assert job.items[1].dataset == dataset_keep
+        assert job.items[2].dataset is None
     def test_purge_datasets_community(self):
         dataset = Dataset.objects.create(title="delete me", deleted="2016-01-01")
         community_resource1 = CommunityResourceFactory()

udata/tests/frontend/test_auth.py CHANGED Viewed

@@ -45,3 +45,37 @@ class AuthTest(APITestCase):
         # Email should not have changed
         user.reload()
         assert user.email == original_email
+    def test_change_mail_after_password_change(self):
+        """Changing password rotates fs_uniquifier and invalidates email change token"""
+        user = UserFactory(password="Password123")
+        self.login(user)
+        old_uniquifier = user.fs_uniquifier
+        new_email = "new@example.com"
+        security = current_app.extensions["security"]
+        data = [str(user.fs_uniquifier), hash_data(user.email), new_email]
+        token = security.confirm_serializer.dumps(data)
+        confirmation_link = url_for("security.confirm_change_email", token=token)
+        # Change password via API
+        resp = self.post(
+            url_for("security.change_password"),
+            {
+                "password": "Password123",
+                "new_password": "NewPassword456",
+                "new_password_confirm": "NewPassword456",
+                "submit": True,
+            },
+        )
+        assert resp.status_code == 200, f"Password change failed: {resp.data}"
+        user.reload()
+        assert user.fs_uniquifier != old_uniquifier, "fs_uniquifier should have changed"
+        # Now try to use the email change link - should fail
+        resp = self.get(confirmation_link)
+        assert resp.status_code == 302
+        assert "change_email_invalid" in resp.location

udata/tests/helpers.py CHANGED Viewed

@@ -4,6 +4,7 @@ from datetime import timedelta
 from io import BytesIO
 from urllib.parse import parse_qs, urlparse
+import pytest
 from flask import current_app, json
 from flask_security.babel import FsDomain
 from PIL import Image
@@ -11,6 +12,11 @@ from PIL import Image
 from udata.core.spatial.factories import GeoZoneFactory
 from udata.mail import mail_sent
+requires_search_service = pytest.mark.skipif(
+    not os.environ.get("UDATA_TEST_SEARCH_INTEGRATION"),
+    reason="Set UDATA_TEST_SEARCH_INTEGRATION=1 to run search integration tests",
+)
 def assert_equal_dates(datetime1, datetime2, limit=1):  # Seconds.
     """Lax date comparison, avoid comparing milliseconds and seconds."""

udata/tests/search/test_search_integration.py ADDED Viewed

@@ -0,0 +1,33 @@
+import time
+import pytest
+from udata.core.dataset.factories import DatasetFactory
+from udata.tests.api import APITestCase
+from udata.tests.helpers import requires_search_service
+@requires_search_service
+@pytest.mark.options(SEARCH_SERVICE_API_URL="http://localhost:5000/api/1/", AUTO_INDEX=True)
+class SearchIntegrationTest(APITestCase):
+    """Integration tests that require a running search-service and Elasticsearch."""
+    def test_dataset_fuzzy_search(self):
+        """
+        Test that Elasticsearch fuzzy search works.
+        A typo in the search query ("spectakulaire" instead of "spectaculaire")
+        should still find the dataset thanks to ES fuzzy matching.
+        """
+        DatasetFactory(title="Données spectaculaires sur les transports")
+        # Small delay to let ES index the document
+        time.sleep(1)
+        # Search with a typo - only ES fuzzy search can handle this
+        response = self.get("/api/2/datasets/search/?q=spectakulaire")
+        self.assert200(response)
+        assert response.json["total"] >= 1
+        titles = [d["title"] for d in response.json["data"]]
+        assert "Données spectaculaires sur les transports" in titles

udata/tests/test_api_fields.py CHANGED Viewed

@@ -354,3 +354,13 @@ class ApplyPaginationTest(PytestOnlyDBTestCase):
             results: DBPaginator = Fake.apply_pagination(Fake.apply_sort_filters(Fake.objects))
             assert results.page_size == 5
             assert results.page == 3
+    def test_negative_page_size_returns_404(self, app) -> None:
+        """Negative page_size should return a 404 error."""
+        from werkzeug.exceptions import NotFound
+        FakeFactory()
+        with app.test_request_context("/foobar", query_string={"page": 1, "page_size": -5}):
+            with pytest.raises(NotFound):
+                Fake.apply_pagination(Fake.apply_sort_filters(Fake.objects))

udata 14.4.1.dev7__py3-none-any.whl → 14.5.1.dev6__py3-none-any.whl

Potentially problematic release.

udata 14.4.1.dev7py3-none-any.whl → 14.5.1.dev6py3-none-any.whl