udata 10.8.2.dev36980__py2.py3-none-any.whl → 10.8.2.dev37076__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of udata might be problematic. Click here for more details.

Files changed (30) hide show
  1. udata/commands/db.py +22 -9
  2. udata/core/dataset/models.py +5 -3
  3. udata/harvest/tests/ckan/test_ckan_backend.py +1 -1
  4. udata/migrations/2025-07-30-purge-old-harvest-dynamic-fields.py +29 -0
  5. udata/static/chunks/{10.471164b2a9fe15614797.js → 10.8ca60413647062717b1e.js} +3 -3
  6. udata/static/chunks/{10.471164b2a9fe15614797.js.map → 10.8ca60413647062717b1e.js.map} +1 -1
  7. udata/static/chunks/{11.55ab79044cda0271b595.js → 11.b6f741fcc366abfad9c4.js} +3 -3
  8. udata/static/chunks/{11.55ab79044cda0271b595.js.map → 11.b6f741fcc366abfad9c4.js.map} +1 -1
  9. udata/static/chunks/{13.f29411b06be1883356a3.js → 13.2d06442dd9a05d9777b5.js} +2 -2
  10. udata/static/chunks/{13.f29411b06be1883356a3.js.map → 13.2d06442dd9a05d9777b5.js.map} +1 -1
  11. udata/static/chunks/{17.3bd0340930d4a314ce9c.js → 17.e8e4caaad5cb0cc0bacc.js} +2 -2
  12. udata/static/chunks/{17.3bd0340930d4a314ce9c.js.map → 17.e8e4caaad5cb0cc0bacc.js.map} +1 -1
  13. udata/static/chunks/{19.3e0e8651d948e04b8cf2.js → 19.f03a102365af4315f9db.js} +3 -3
  14. udata/static/chunks/{19.3e0e8651d948e04b8cf2.js.map → 19.f03a102365af4315f9db.js.map} +1 -1
  15. udata/static/chunks/{8.494b003a94383b142c18.js → 8.778091d55cd8ea39af6b.js} +2 -2
  16. udata/static/chunks/{8.494b003a94383b142c18.js.map → 8.778091d55cd8ea39af6b.js.map} +1 -1
  17. udata/static/chunks/{9.07515e5187f475bce828.js → 9.033d7e190ca9e226a5d0.js} +3 -3
  18. udata/static/chunks/{9.07515e5187f475bce828.js.map → 9.033d7e190ca9e226a5d0.js.map} +1 -1
  19. udata/static/common.js +1 -1
  20. udata/static/common.js.map +1 -1
  21. udata/tests/api/test_datasets_api.py +0 -46
  22. udata/tests/cli/test_db_cli.py +12 -0
  23. udata/tests/dataset/test_dataset_model.py +0 -16
  24. {udata-10.8.2.dev36980.dist-info → udata-10.8.2.dev37076.dist-info}/METADATA +3 -1
  25. {udata-10.8.2.dev36980.dist-info → udata-10.8.2.dev37076.dist-info}/RECORD +29 -29
  26. udata/harvest/backends/ckan/models.py +0 -10
  27. {udata-10.8.2.dev36980.dist-info → udata-10.8.2.dev37076.dist-info}/LICENSE +0 -0
  28. {udata-10.8.2.dev36980.dist-info → udata-10.8.2.dev37076.dist-info}/WHEEL +0 -0
  29. {udata-10.8.2.dev36980.dist-info → udata-10.8.2.dev37076.dist-info}/entry_points.txt +0 -0
  30. {udata-10.8.2.dev36980.dist-info → udata-10.8.2.dev37076.dist-info}/top_level.txt +0 -0
udata/commands/db.py CHANGED
@@ -2,6 +2,7 @@ import collections
2
2
  import copy
3
3
  import logging
4
4
  import os
5
+ import sys
5
6
  import traceback
6
7
  from itertools import groupby
7
8
  from typing import Optional
@@ -312,15 +313,26 @@ def check_references(models_to_check):
312
313
  f"\t{model.__name__}#{obj.id} have a broken reference for `{reference['name']}`"
313
314
  )
314
315
  elif reference["type"] == "list":
315
- attr_list = getattr(obj, reference["name"], [])
316
- for i, sub in enumerate(attr_list):
317
- # If it's still an instance of DBRef it means that it failed to
318
- # dereference the ID.
319
- if isinstance(sub, DBRef):
320
- errors[model][key] += 1
321
- print_and_save(
322
- f"\t{model.__name__}#{obj.id} have a broken reference for {reference['name']}[{i}]"
323
- )
316
+ field_exists = (
317
+ f"{reference['name']}__exists" # Eg: "contact_points__exists"
318
+ )
319
+ if model.objects(id=obj.id, **{field_exists: True}).count() == 0:
320
+ # See https://github.com/MongoEngine/mongoengine/issues/267#issuecomment-283065318
321
+ # Setting it explicitely to an empty list actually removes the field, it shouldn't.
322
+ errors[model][key] += 1
323
+ print_and_save(
324
+ f"\t{model.__name__}#{obj.id} have a non existing field `{reference['name']}`, instead of an empty list"
325
+ )
326
+ else:
327
+ attr_list = getattr(obj, reference["name"])
328
+ for i, sub in enumerate(attr_list):
329
+ # If it's still an instance of DBRef it means that it failed to
330
+ # dereference the ID.
331
+ if isinstance(sub, DBRef):
332
+ errors[model][key] += 1
333
+ print_and_save(
334
+ f"\t{model.__name__}#{obj.id} have a broken reference for {reference['name']}[{i}]"
335
+ )
324
336
  elif reference["type"] == "embed_list":
325
337
  p1, p2 = reference["name"].split("__")
326
338
  attr_list = getattr(obj, p1, [])
@@ -380,6 +392,7 @@ def check_references(models_to_check):
380
392
  sentry_sdk.capture_message(f"{total} integrity errors", "fatal")
381
393
  except ImportError:
382
394
  print("`sentry_sdk` not installed. The errors weren't reported")
395
+ sys.exit(1)
383
396
 
384
397
 
385
398
  @grp.command()
@@ -10,7 +10,6 @@ import requests
10
10
  from blinker import signal
11
11
  from dateutil.parser import parse as parse_dt
12
12
  from flask import current_app, url_for
13
- from mongoengine import DynamicEmbeddedDocument
14
13
  from mongoengine import ValidationError as MongoEngineValidationError
15
14
  from mongoengine.fields import DateTimeField
16
15
  from mongoengine.signals import post_save, pre_init, pre_save
@@ -78,7 +77,7 @@ def get_json_ld_extra(key, value):
78
77
  }
79
78
 
80
79
 
81
- class HarvestDatasetMetadata(DynamicEmbeddedDocument):
80
+ class HarvestDatasetMetadata(db.EmbeddedDocument):
82
81
  backend = db.StringField()
83
82
  created_at = db.DateTimeField()
84
83
  modified_at = db.DateTimeField()
@@ -91,12 +90,15 @@ class HarvestDatasetMetadata(DynamicEmbeddedDocument):
91
90
  dct_identifier = db.StringField()
92
91
  archived_at = db.DateTimeField()
93
92
  archived = db.StringField()
93
+ ckan_name = db.StringField()
94
+ ckan_source = db.StringField()
94
95
 
95
96
 
96
- class HarvestResourceMetadata(DynamicEmbeddedDocument):
97
+ class HarvestResourceMetadata(db.EmbeddedDocument):
97
98
  created_at = db.DateTimeField()
98
99
  modified_at = db.DateTimeField()
99
100
  uri = db.StringField()
101
+ dct_identifier = db.StringField()
100
102
 
101
103
 
102
104
  class Schema(db.EmbeddedDocument):
@@ -460,7 +460,7 @@ def test_skip_no_resources(source, result):
460
460
  def test_ckan_url_is_url(data, result):
461
461
  dataset = dataset_for(result)
462
462
  assert dataset.harvest.remote_url == data["url"]
463
- assert not hasattr(dataset.harvest, "ckan_source")
463
+ assert dataset.harvest.ckan_source is None
464
464
 
465
465
 
466
466
  @pytest.mark.ckan_data("ckan_url_is_a_string")
@@ -0,0 +1,29 @@
1
+ """
2
+ This migration removes legacy harvest dynamic fields
3
+ """
4
+
5
+ import logging
6
+
7
+ from mongoengine.connection import get_db
8
+
9
+ log = logging.getLogger(__name__)
10
+
11
+
12
+ def migrate(db):
13
+ # Remove legacy fields (`ods_has_records`, `ods_url`, ...) from old harvested datasets and resources
14
+ dataset_legacy_fields = ["ods_has_records", "ods_url", "ods_geo"]
15
+ for field in dataset_legacy_fields:
16
+ result = get_db().dataset.update_many({}, {"$unset": {f"harvest.{field}": 1}})
17
+ log.info(
18
+ f"Harvest Dataset dynamic legacy fields ({field}) removed from {result.modified_count} objects"
19
+ )
20
+
21
+ resource_legacy_fields = ["ods_type"]
22
+ for field in resource_legacy_fields:
23
+ result = get_db().dataset.update_many(
24
+ {"resources": {"$exists": True, "$type": "array"}},
25
+ {"$unset": {f"resources.$[].harvest.{field}": 1}},
26
+ )
27
+ log.info(
28
+ f"Harvest Resource dynamic legacy fields ({field}) removed from {result.modified_count} objects"
29
+ )