udata 10.8.2.dev36980__py2.py3-none-any.whl → 10.8.2.dev37076__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of udata might be problematic. Click here for more details.
- udata/commands/db.py +22 -9
- udata/core/dataset/models.py +5 -3
- udata/harvest/tests/ckan/test_ckan_backend.py +1 -1
- udata/migrations/2025-07-30-purge-old-harvest-dynamic-fields.py +29 -0
- udata/static/chunks/{10.471164b2a9fe15614797.js → 10.8ca60413647062717b1e.js} +3 -3
- udata/static/chunks/{10.471164b2a9fe15614797.js.map → 10.8ca60413647062717b1e.js.map} +1 -1
- udata/static/chunks/{11.55ab79044cda0271b595.js → 11.b6f741fcc366abfad9c4.js} +3 -3
- udata/static/chunks/{11.55ab79044cda0271b595.js.map → 11.b6f741fcc366abfad9c4.js.map} +1 -1
- udata/static/chunks/{13.f29411b06be1883356a3.js → 13.2d06442dd9a05d9777b5.js} +2 -2
- udata/static/chunks/{13.f29411b06be1883356a3.js.map → 13.2d06442dd9a05d9777b5.js.map} +1 -1
- udata/static/chunks/{17.3bd0340930d4a314ce9c.js → 17.e8e4caaad5cb0cc0bacc.js} +2 -2
- udata/static/chunks/{17.3bd0340930d4a314ce9c.js.map → 17.e8e4caaad5cb0cc0bacc.js.map} +1 -1
- udata/static/chunks/{19.3e0e8651d948e04b8cf2.js → 19.f03a102365af4315f9db.js} +3 -3
- udata/static/chunks/{19.3e0e8651d948e04b8cf2.js.map → 19.f03a102365af4315f9db.js.map} +1 -1
- udata/static/chunks/{8.494b003a94383b142c18.js → 8.778091d55cd8ea39af6b.js} +2 -2
- udata/static/chunks/{8.494b003a94383b142c18.js.map → 8.778091d55cd8ea39af6b.js.map} +1 -1
- udata/static/chunks/{9.07515e5187f475bce828.js → 9.033d7e190ca9e226a5d0.js} +3 -3
- udata/static/chunks/{9.07515e5187f475bce828.js.map → 9.033d7e190ca9e226a5d0.js.map} +1 -1
- udata/static/common.js +1 -1
- udata/static/common.js.map +1 -1
- udata/tests/api/test_datasets_api.py +0 -46
- udata/tests/cli/test_db_cli.py +12 -0
- udata/tests/dataset/test_dataset_model.py +0 -16
- {udata-10.8.2.dev36980.dist-info → udata-10.8.2.dev37076.dist-info}/METADATA +3 -1
- {udata-10.8.2.dev36980.dist-info → udata-10.8.2.dev37076.dist-info}/RECORD +29 -29
- udata/harvest/backends/ckan/models.py +0 -10
- {udata-10.8.2.dev36980.dist-info → udata-10.8.2.dev37076.dist-info}/LICENSE +0 -0
- {udata-10.8.2.dev36980.dist-info → udata-10.8.2.dev37076.dist-info}/WHEEL +0 -0
- {udata-10.8.2.dev36980.dist-info → udata-10.8.2.dev37076.dist-info}/entry_points.txt +0 -0
- {udata-10.8.2.dev36980.dist-info → udata-10.8.2.dev37076.dist-info}/top_level.txt +0 -0
udata/commands/db.py
CHANGED
|
@@ -2,6 +2,7 @@ import collections
|
|
|
2
2
|
import copy
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
|
+
import sys
|
|
5
6
|
import traceback
|
|
6
7
|
from itertools import groupby
|
|
7
8
|
from typing import Optional
|
|
@@ -312,15 +313,26 @@ def check_references(models_to_check):
|
|
|
312
313
|
f"\t{model.__name__}#{obj.id} have a broken reference for `{reference['name']}`"
|
|
313
314
|
)
|
|
314
315
|
elif reference["type"] == "list":
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
316
|
+
field_exists = (
|
|
317
|
+
f"{reference['name']}__exists" # Eg: "contact_points__exists"
|
|
318
|
+
)
|
|
319
|
+
if model.objects(id=obj.id, **{field_exists: True}).count() == 0:
|
|
320
|
+
# See https://github.com/MongoEngine/mongoengine/issues/267#issuecomment-283065318
|
|
321
|
+
# Setting it explicitely to an empty list actually removes the field, it shouldn't.
|
|
322
|
+
errors[model][key] += 1
|
|
323
|
+
print_and_save(
|
|
324
|
+
f"\t{model.__name__}#{obj.id} have a non existing field `{reference['name']}`, instead of an empty list"
|
|
325
|
+
)
|
|
326
|
+
else:
|
|
327
|
+
attr_list = getattr(obj, reference["name"])
|
|
328
|
+
for i, sub in enumerate(attr_list):
|
|
329
|
+
# If it's still an instance of DBRef it means that it failed to
|
|
330
|
+
# dereference the ID.
|
|
331
|
+
if isinstance(sub, DBRef):
|
|
332
|
+
errors[model][key] += 1
|
|
333
|
+
print_and_save(
|
|
334
|
+
f"\t{model.__name__}#{obj.id} have a broken reference for {reference['name']}[{i}]"
|
|
335
|
+
)
|
|
324
336
|
elif reference["type"] == "embed_list":
|
|
325
337
|
p1, p2 = reference["name"].split("__")
|
|
326
338
|
attr_list = getattr(obj, p1, [])
|
|
@@ -380,6 +392,7 @@ def check_references(models_to_check):
|
|
|
380
392
|
sentry_sdk.capture_message(f"{total} integrity errors", "fatal")
|
|
381
393
|
except ImportError:
|
|
382
394
|
print("`sentry_sdk` not installed. The errors weren't reported")
|
|
395
|
+
sys.exit(1)
|
|
383
396
|
|
|
384
397
|
|
|
385
398
|
@grp.command()
|
udata/core/dataset/models.py
CHANGED
|
@@ -10,7 +10,6 @@ import requests
|
|
|
10
10
|
from blinker import signal
|
|
11
11
|
from dateutil.parser import parse as parse_dt
|
|
12
12
|
from flask import current_app, url_for
|
|
13
|
-
from mongoengine import DynamicEmbeddedDocument
|
|
14
13
|
from mongoengine import ValidationError as MongoEngineValidationError
|
|
15
14
|
from mongoengine.fields import DateTimeField
|
|
16
15
|
from mongoengine.signals import post_save, pre_init, pre_save
|
|
@@ -78,7 +77,7 @@ def get_json_ld_extra(key, value):
|
|
|
78
77
|
}
|
|
79
78
|
|
|
80
79
|
|
|
81
|
-
class HarvestDatasetMetadata(
|
|
80
|
+
class HarvestDatasetMetadata(db.EmbeddedDocument):
|
|
82
81
|
backend = db.StringField()
|
|
83
82
|
created_at = db.DateTimeField()
|
|
84
83
|
modified_at = db.DateTimeField()
|
|
@@ -91,12 +90,15 @@ class HarvestDatasetMetadata(DynamicEmbeddedDocument):
|
|
|
91
90
|
dct_identifier = db.StringField()
|
|
92
91
|
archived_at = db.DateTimeField()
|
|
93
92
|
archived = db.StringField()
|
|
93
|
+
ckan_name = db.StringField()
|
|
94
|
+
ckan_source = db.StringField()
|
|
94
95
|
|
|
95
96
|
|
|
96
|
-
class HarvestResourceMetadata(
|
|
97
|
+
class HarvestResourceMetadata(db.EmbeddedDocument):
|
|
97
98
|
created_at = db.DateTimeField()
|
|
98
99
|
modified_at = db.DateTimeField()
|
|
99
100
|
uri = db.StringField()
|
|
101
|
+
dct_identifier = db.StringField()
|
|
100
102
|
|
|
101
103
|
|
|
102
104
|
class Schema(db.EmbeddedDocument):
|
|
@@ -460,7 +460,7 @@ def test_skip_no_resources(source, result):
|
|
|
460
460
|
def test_ckan_url_is_url(data, result):
|
|
461
461
|
dataset = dataset_for(result)
|
|
462
462
|
assert dataset.harvest.remote_url == data["url"]
|
|
463
|
-
assert
|
|
463
|
+
assert dataset.harvest.ckan_source is None
|
|
464
464
|
|
|
465
465
|
|
|
466
466
|
@pytest.mark.ckan_data("ckan_url_is_a_string")
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This migration removes legacy harvest dynamic fields
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
|
|
7
|
+
from mongoengine.connection import get_db
|
|
8
|
+
|
|
9
|
+
log = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def migrate(db):
|
|
13
|
+
# Remove legacy fields (`ods_has_records`, `ods_url`, ...) from old harvested datasets and resources
|
|
14
|
+
dataset_legacy_fields = ["ods_has_records", "ods_url", "ods_geo"]
|
|
15
|
+
for field in dataset_legacy_fields:
|
|
16
|
+
result = get_db().dataset.update_many({}, {"$unset": {f"harvest.{field}": 1}})
|
|
17
|
+
log.info(
|
|
18
|
+
f"Harvest Dataset dynamic legacy fields ({field}) removed from {result.modified_count} objects"
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
resource_legacy_fields = ["ods_type"]
|
|
22
|
+
for field in resource_legacy_fields:
|
|
23
|
+
result = get_db().dataset.update_many(
|
|
24
|
+
{"resources": {"$exists": True, "$type": "array"}},
|
|
25
|
+
{"$unset": {f"resources.$[].harvest.{field}": 1}},
|
|
26
|
+
)
|
|
27
|
+
log.info(
|
|
28
|
+
f"Harvest Resource dynamic legacy fields ({field}) removed from {result.modified_count} objects"
|
|
29
|
+
)
|