udata 14.4.1.dev7__py3-none-any.whl → 14.5.1.dev6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of udata might be problematic. Click here for more details.

Files changed (45) hide show
  1. udata/api/__init__.py +2 -0
  2. udata/auth/views.py +7 -3
  3. udata/commands/dcat.py +1 -1
  4. udata/core/dataservices/api.py +8 -1
  5. udata/core/dataservices/apiv2.py +2 -5
  6. udata/core/dataservices/models.py +4 -1
  7. udata/core/dataservices/rdf.py +2 -1
  8. udata/core/dataservices/tasks.py +6 -2
  9. udata/core/dataset/api.py +8 -0
  10. udata/core/dataset/models.py +4 -4
  11. udata/core/dataset/rdf.py +8 -2
  12. udata/core/dataset/tasks.py +6 -2
  13. udata/core/discussions/api.py +15 -1
  14. udata/core/discussions/models.py +5 -0
  15. udata/core/legal/__init__.py +0 -0
  16. udata/core/legal/mails.py +128 -0
  17. udata/core/organization/api.py +8 -0
  18. udata/core/organization/apiv2.py +2 -3
  19. udata/core/organization/models.py +6 -1
  20. udata/core/reuse/api.py +8 -0
  21. udata/core/reuse/apiv2.py +2 -5
  22. udata/core/topic/models.py +8 -2
  23. udata/core/user/api.py +10 -3
  24. udata/core/user/models.py +7 -1
  25. udata/flask_mongoengine/pagination.py +1 -1
  26. udata/harvest/backends/dcat.py +4 -1
  27. udata/harvest/tests/test_dcat_backend.py +24 -0
  28. udata/mail.py +14 -0
  29. udata/rdf.py +13 -4
  30. udata/settings.py +4 -0
  31. udata/tests/api/test_datasets_api.py +6 -0
  32. udata/tests/apiv2/test_search.py +30 -0
  33. udata/tests/dataservice/test_dataservice_tasks.py +29 -0
  34. udata/tests/dataset/test_dataset_tasks.py +25 -0
  35. udata/tests/frontend/test_auth.py +34 -0
  36. udata/tests/helpers.py +6 -0
  37. udata/tests/search/test_search_integration.py +33 -0
  38. udata/tests/test_api_fields.py +10 -0
  39. udata/tests/test_legal_mails.py +359 -0
  40. {udata-14.4.1.dev7.dist-info → udata-14.5.1.dev6.dist-info}/METADATA +2 -2
  41. {udata-14.4.1.dev7.dist-info → udata-14.5.1.dev6.dist-info}/RECORD +45 -40
  42. {udata-14.4.1.dev7.dist-info → udata-14.5.1.dev6.dist-info}/WHEEL +0 -0
  43. {udata-14.4.1.dev7.dist-info → udata-14.5.1.dev6.dist-info}/entry_points.txt +0 -0
  44. {udata-14.4.1.dev7.dist-info → udata-14.5.1.dev6.dist-info}/licenses/LICENSE +0 -0
  45. {udata-14.4.1.dev7.dist-info → udata-14.5.1.dev6.dist-info}/top_level.txt +0 -0
udata/core/user/models.py CHANGED
@@ -18,6 +18,7 @@ from udata.core.discussions.models import Discussion
18
18
  from udata.core.linkable import Linkable
19
19
  from udata.core.storages import avatars, default_image_basename
20
20
  from udata.frontend.markdown import mdstrip
21
+ from udata.i18n import lazy_gettext as _
21
22
  from udata.models import Follow, WithMetrics, db
22
23
  from udata.uris import cdata_url
23
24
 
@@ -62,7 +63,10 @@ class User(WithMetrics, UserMixin, Linkable, db.Document):
62
63
  db.ImageField(fs=avatars, basename=default_image_basename, thumbnails=AVATAR_SIZES)
63
64
  )
64
65
  website = field(db.URLField())
65
- about = field(db.StringField())
66
+ about = field(
67
+ db.StringField(),
68
+ markdown=True,
69
+ )
66
70
 
67
71
  prefered_language = field(db.StringField())
68
72
 
@@ -116,6 +120,8 @@ class User(WithMetrics, UserMixin, Linkable, db.Document):
116
120
  "auto_create_index_on_save": True,
117
121
  }
118
122
 
123
+ verbose_name = _("account")
124
+
119
125
  __metrics_keys__ = [
120
126
  "datasets",
121
127
  "reuses",
@@ -6,7 +6,7 @@ from mongoengine.queryset import QuerySet
6
6
 
7
7
  class Pagination(object):
8
8
  def __init__(self, iterable, page, per_page):
9
- if page < 1:
9
+ if page < 1 or per_page < 1:
10
10
  abort(404)
11
11
 
12
12
  self.iterable = iterable
@@ -225,7 +225,9 @@ class DcatBackend(BaseBackend):
225
225
 
226
226
  dataset = self.get_dataset(item.remote_id)
227
227
  remote_url_prefix = self.get_extra_config_value("remote_url_prefix")
228
- dataset = dataset_from_rdf(page, dataset, node=node, remote_url_prefix=remote_url_prefix)
228
+ dataset = dataset_from_rdf(
229
+ page, dataset, node=node, remote_url_prefix=remote_url_prefix, dryrun=self.dryrun
230
+ )
229
231
  if dataset.organization:
230
232
  dataset.organization.compute_aggregate_metrics = False
231
233
  self.organizations_to_update.add(dataset.organization)
@@ -242,6 +244,7 @@ class DcatBackend(BaseBackend):
242
244
  node,
243
245
  [item.dataset for item in self.job.items],
244
246
  remote_url_prefix=remote_url_prefix,
247
+ dryrun=self.dryrun,
245
248
  )
246
249
 
247
250
  def get_node_from_item(self, graph, item):
@@ -972,6 +972,30 @@ class DcatBackendTest(PytestOnlyDBTestCase):
972
972
  assert "connection error" in mock_warning.call_args[0][0].lower()
973
973
  mock_exception.assert_not_called()
974
974
 
975
+ def test_preview_does_not_create_contact_points(self, rmock):
976
+ """Preview should not create ContactPoints in DB."""
977
+ from udata.core.contact_point.models import ContactPoint
978
+
979
+ LicenseFactory(id="lov2", title="Licence Ouverte Version 2.0")
980
+ LicenseFactory(id="lov1", title="Licence Ouverte Version 1.0")
981
+
982
+ url = mock_dcat(rmock, "catalog.xml", path="catalog.xml")
983
+ org = OrganizationFactory()
984
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
985
+
986
+ assert ContactPoint.objects.count() == 0
987
+
988
+ job = actions.preview(source)
989
+
990
+ assert job.status == "done"
991
+ assert len(job.items) == 4
992
+
993
+ # No ContactPoints should have been created in the database
994
+ assert ContactPoint.objects.count() == 0
995
+
996
+ # No datasets should have been created either
997
+ assert Dataset.objects.count() == 0
998
+
975
999
 
976
1000
  @pytest.mark.options(HARVESTER_BACKENDS=["csw*"])
977
1001
  class CswDcatBackendTest(PytestOnlyDBTestCase):
udata/mail.py CHANGED
@@ -39,6 +39,20 @@ class LabelledContent:
39
39
  )
40
40
 
41
41
 
42
+ @dataclass
43
+ class Link:
44
+ """Simple linkable object for use in ParagraphWithLinks"""
45
+
46
+ label: str
47
+ url: str
48
+
49
+ def __str__(self):
50
+ return str(self.label)
51
+
52
+ def url_for(self, **kwargs):
53
+ return self.url
54
+
55
+
42
56
  @dataclass
43
57
  class ParagraphWithLinks:
44
58
  paragraph: LazyString
udata/rdf.py CHANGED
@@ -367,7 +367,7 @@ def contact_point_name(agent_name: str | None, org_name: str | None) -> str:
367
367
  return agent_name or org_name or ""
368
368
 
369
369
 
370
- def contact_points_from_rdf(rdf, prop, role, dataset):
370
+ def contact_points_from_rdf(rdf, prop, role, dataset, dryrun=False):
371
371
  if not dataset.organization and not dataset.owner:
372
372
  return
373
373
  for contact_point in rdf.objects(prop):
@@ -414,9 +414,18 @@ def contact_points_from_rdf(rdf, prop, role, dataset):
414
414
  else:
415
415
  org_or_owner = {"owner": dataset.owner}
416
416
  try:
417
- contact, _ = ContactPoint.objects.get_or_create(
418
- name=name, email=email, contact_form=contact_form, role=role, **org_or_owner
419
- )
417
+ if dryrun:
418
+ # In dryrun mode, only reuse existing contact points, don't create new ones.
419
+ # Mongoengine doesn't allow referencing unsaved documents.
420
+ contact = ContactPoint.objects.filter(
421
+ name=name, email=email, contact_form=contact_form, role=role, **org_or_owner
422
+ ).first()
423
+ if not contact:
424
+ continue
425
+ else:
426
+ contact, _ = ContactPoint.objects.get_or_create(
427
+ name=name, email=email, contact_form=contact_form, role=role, **org_or_owner
428
+ )
420
429
  except mongoengine.errors.ValidationError as validation_error:
421
430
  log.warning(f"Unable to validate contact point: {validation_error}", exc_info=True)
422
431
  continue
udata/settings.py CHANGED
@@ -174,6 +174,10 @@ class Defaults(object):
174
174
  SITE_AUTHOR = "Udata"
175
175
  SITE_GITHUB_URL = "https://github.com/etalab/udata"
176
176
 
177
+ TERMS_OF_USE_URL = None
178
+ TERMS_OF_USE_DELETION_ARTICLE = None
179
+ TELERECOURS_URL = None
180
+
177
181
  UDATA_INSTANCE_NAME = "udata"
178
182
 
179
183
  HARVESTER_BACKENDS = []
@@ -1706,6 +1706,12 @@ class DatasetResourceAPITest(APITestCase):
1706
1706
  self.dataset.reload()
1707
1707
  self.assertEqual(len(self.dataset.resources), 2)
1708
1708
 
1709
+ def test_create_with_list_returns_400(self):
1710
+ """It should return 400 when sending a list instead of a dict"""
1711
+ data = [ResourceFactory.as_dict()]
1712
+ response = self.post(url_for("api.resources", dataset=self.dataset), data)
1713
+ self.assert400(response)
1714
+
1709
1715
  def test_create_with_file(self):
1710
1716
  """It should create a resource from the API with a file"""
1711
1717
  user = self.login()
@@ -0,0 +1,30 @@
1
+ from udata.core.dataservices.factories import DataserviceFactory
2
+ from udata.core.organization.factories import OrganizationFactory
3
+ from udata.core.reuse.factories import ReuseFactory
4
+ from udata.tests.api import APITestCase
5
+
6
+
7
+ class SearchAPIV2Test(APITestCase):
8
+ def test_dataservice_search_with_model_query_param(self):
9
+ """Searching dataservices with 'model' as query param should not crash.
10
+
11
+ Regression test for: TypeError: query() got multiple values for argument 'model'
12
+ """
13
+ DataserviceFactory.create_batch(3)
14
+
15
+ response = self.get("/api/2/dataservices/search/?model=malicious")
16
+ self.assert200(response)
17
+
18
+ def test_reuse_search_with_model_query_param(self):
19
+ """Searching reuses with 'model' as query param should not crash."""
20
+ ReuseFactory.create_batch(3)
21
+
22
+ response = self.get("/api/2/reuses/search/?model=malicious")
23
+ self.assert200(response)
24
+
25
+ def test_organization_search_with_model_query_param(self):
26
+ """Searching organizations with 'model' as query param should not crash."""
27
+ OrganizationFactory.create_batch(3)
28
+
29
+ response = self.get("/api/2/organizations/search/?model=malicious")
30
+ self.assert200(response)
@@ -43,3 +43,32 @@ class DataserviceTasksTest(PytestOnlyDBTestCase):
43
43
  assert Discussion.objects.filter(id=discussion.id).count() == 0
44
44
  assert Follow.objects.filter(id=follower.id).count() == 0
45
45
  assert HarvestJob.objects.filter(items__dataservice=dataservices[0].id).count() == 0
46
+
47
+ def test_purge_dataservices_cleans_all_harvest_items_references(self):
48
+ """Test that purging dataservices cleans all HarvestItem references in a job.
49
+
50
+ The same dataservice can appear multiple times in a job's items (e.g. if the
51
+ harvest source has duplicates). The $ operator only updates the first match,
52
+ so we need to use $[] with array_filters to update all matches.
53
+ """
54
+ dataservice_to_delete = Dataservice.objects.create(
55
+ title="delete me", base_api_url="https://example.com/api", deleted_at="2016-01-01"
56
+ )
57
+ dataservice_keep = Dataservice.objects.create(
58
+ title="keep me", base_api_url="https://example.com/api"
59
+ )
60
+
61
+ job = HarvestJobFactory(
62
+ items=[
63
+ HarvestItem(dataservice=dataservice_to_delete, remote_id="1"),
64
+ HarvestItem(dataservice=dataservice_keep, remote_id="2"),
65
+ HarvestItem(dataservice=dataservice_to_delete, remote_id="3"),
66
+ ]
67
+ )
68
+
69
+ tasks.purge_dataservices()
70
+
71
+ job.reload()
72
+ assert job.items[0].dataservice is None
73
+ assert job.items[1].dataservice == dataservice_keep
74
+ assert job.items[2].dataservice is None
@@ -60,6 +60,31 @@ class DatasetTasksTest(PytestOnlyDBTestCase):
60
60
  assert HarvestJob.objects.filter(items__dataset=datasets[0].id).count() == 0
61
61
  assert Dataservice.objects.filter(datasets=datasets[0].id).count() == 0
62
62
 
63
+ def test_purge_datasets_cleans_all_harvest_items_references(self):
64
+ """Test that purging datasets cleans all HarvestItem references in a job.
65
+
66
+ The same dataset can appear multiple times in a job's items (e.g. if the
67
+ harvest source has duplicates). The $ operator only updates the first match,
68
+ so we need to use $[] with array_filters to update all matches.
69
+ """
70
+ dataset_to_delete = Dataset.objects.create(title="delete me", deleted="2016-01-01")
71
+ dataset_keep = Dataset.objects.create(title="keep me")
72
+
73
+ job = HarvestJobFactory(
74
+ items=[
75
+ HarvestItem(dataset=dataset_to_delete, remote_id="1"),
76
+ HarvestItem(dataset=dataset_keep, remote_id="2"),
77
+ HarvestItem(dataset=dataset_to_delete, remote_id="3"),
78
+ ]
79
+ )
80
+
81
+ tasks.purge_datasets()
82
+
83
+ job.reload()
84
+ assert job.items[0].dataset is None
85
+ assert job.items[1].dataset == dataset_keep
86
+ assert job.items[2].dataset is None
87
+
63
88
  def test_purge_datasets_community(self):
64
89
  dataset = Dataset.objects.create(title="delete me", deleted="2016-01-01")
65
90
  community_resource1 = CommunityResourceFactory()
@@ -45,3 +45,37 @@ class AuthTest(APITestCase):
45
45
  # Email should not have changed
46
46
  user.reload()
47
47
  assert user.email == original_email
48
+
49
+ def test_change_mail_after_password_change(self):
50
+ """Changing password rotates fs_uniquifier and invalidates email change token"""
51
+ user = UserFactory(password="Password123")
52
+ self.login(user)
53
+ old_uniquifier = user.fs_uniquifier
54
+
55
+ new_email = "new@example.com"
56
+
57
+ security = current_app.extensions["security"]
58
+
59
+ data = [str(user.fs_uniquifier), hash_data(user.email), new_email]
60
+ token = security.confirm_serializer.dumps(data)
61
+ confirmation_link = url_for("security.confirm_change_email", token=token)
62
+
63
+ # Change password via API
64
+ resp = self.post(
65
+ url_for("security.change_password"),
66
+ {
67
+ "password": "Password123",
68
+ "new_password": "NewPassword456",
69
+ "new_password_confirm": "NewPassword456",
70
+ "submit": True,
71
+ },
72
+ )
73
+ assert resp.status_code == 200, f"Password change failed: {resp.data}"
74
+
75
+ user.reload()
76
+ assert user.fs_uniquifier != old_uniquifier, "fs_uniquifier should have changed"
77
+
78
+ # Now try to use the email change link - should fail
79
+ resp = self.get(confirmation_link)
80
+ assert resp.status_code == 302
81
+ assert "change_email_invalid" in resp.location
udata/tests/helpers.py CHANGED
@@ -4,6 +4,7 @@ from datetime import timedelta
4
4
  from io import BytesIO
5
5
  from urllib.parse import parse_qs, urlparse
6
6
 
7
+ import pytest
7
8
  from flask import current_app, json
8
9
  from flask_security.babel import FsDomain
9
10
  from PIL import Image
@@ -11,6 +12,11 @@ from PIL import Image
11
12
  from udata.core.spatial.factories import GeoZoneFactory
12
13
  from udata.mail import mail_sent
13
14
 
15
+ requires_search_service = pytest.mark.skipif(
16
+ not os.environ.get("UDATA_TEST_SEARCH_INTEGRATION"),
17
+ reason="Set UDATA_TEST_SEARCH_INTEGRATION=1 to run search integration tests",
18
+ )
19
+
14
20
 
15
21
  def assert_equal_dates(datetime1, datetime2, limit=1): # Seconds.
16
22
  """Lax date comparison, avoid comparing milliseconds and seconds."""
@@ -0,0 +1,33 @@
1
+ import time
2
+
3
+ import pytest
4
+
5
+ from udata.core.dataset.factories import DatasetFactory
6
+ from udata.tests.api import APITestCase
7
+ from udata.tests.helpers import requires_search_service
8
+
9
+
10
+ @requires_search_service
11
+ @pytest.mark.options(SEARCH_SERVICE_API_URL="http://localhost:5000/api/1/", AUTO_INDEX=True)
12
+ class SearchIntegrationTest(APITestCase):
13
+ """Integration tests that require a running search-service and Elasticsearch."""
14
+
15
+ def test_dataset_fuzzy_search(self):
16
+ """
17
+ Test that Elasticsearch fuzzy search works.
18
+
19
+ A typo in the search query ("spectakulaire" instead of "spectaculaire")
20
+ should still find the dataset thanks to ES fuzzy matching.
21
+ """
22
+ DatasetFactory(title="Données spectaculaires sur les transports")
23
+
24
+ # Small delay to let ES index the document
25
+ time.sleep(1)
26
+
27
+ # Search with a typo - only ES fuzzy search can handle this
28
+ response = self.get("/api/2/datasets/search/?q=spectakulaire")
29
+ self.assert200(response)
30
+ assert response.json["total"] >= 1
31
+
32
+ titles = [d["title"] for d in response.json["data"]]
33
+ assert "Données spectaculaires sur les transports" in titles
@@ -354,3 +354,13 @@ class ApplyPaginationTest(PytestOnlyDBTestCase):
354
354
  results: DBPaginator = Fake.apply_pagination(Fake.apply_sort_filters(Fake.objects))
355
355
  assert results.page_size == 5
356
356
  assert results.page == 3
357
+
358
+ def test_negative_page_size_returns_404(self, app) -> None:
359
+ """Negative page_size should return a 404 error."""
360
+ from werkzeug.exceptions import NotFound
361
+
362
+ FakeFactory()
363
+
364
+ with app.test_request_context("/foobar", query_string={"page": 1, "page_size": -5}):
365
+ with pytest.raises(NotFound):
366
+ Fake.apply_pagination(Fake.apply_sort_filters(Fake.objects))