udata 8.0.2.dev29304__py2.py3-none-any.whl → 9.1.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of udata might be problematic. Click here for more details.
- udata/__init__.py +1 -1
- udata/api/__init__.py +2 -0
- udata/api/commands.py +0 -2
- udata/api_fields.py +41 -3
- udata/commands/db.py +88 -48
- udata/core/dataservices/factories.py +33 -0
- udata/core/dataservices/models.py +42 -4
- udata/core/dataservices/rdf.py +106 -0
- udata/core/dataset/csv.py +8 -1
- udata/core/dataset/models.py +1 -2
- udata/core/dataset/rdf.py +37 -128
- udata/core/discussions/models.py +20 -0
- udata/core/organization/csv.py +5 -3
- udata/core/reports/__init__.py +0 -0
- udata/core/reports/api.py +44 -0
- udata/core/reports/constants.py +30 -0
- udata/core/reports/models.py +58 -0
- udata/core/reuse/csv.py +3 -0
- udata/core/site/api.py +33 -2
- udata/core/site/rdf.py +6 -1
- udata/core/spam/models.py +6 -0
- udata/core/topic/models.py +3 -2
- udata/core/topic/parsers.py +3 -2
- udata/core/user/apiv2.py +28 -0
- udata/db/__init__.py +0 -0
- udata/db/tasks.py +6 -0
- udata/features/notifications/__init__.py +0 -1
- udata/forms/fields.py +2 -2
- udata/harvest/api.py +19 -1
- udata/harvest/backends/base.py +118 -10
- udata/harvest/backends/dcat.py +28 -7
- udata/harvest/models.py +6 -0
- udata/harvest/tests/dcat/bnodes.xml +13 -2
- udata/harvest/tests/test_dcat_backend.py +21 -0
- udata/migrations/2024-06-11-fix-reuse-datasets-references.py +35 -0
- udata/models/__init__.py +1 -0
- udata/rdf.py +113 -2
- udata/routing.py +1 -1
- udata/settings.py +3 -1
- udata/static/admin.js +17 -17
- udata/static/admin.js.map +1 -1
- udata/static/chunks/{18.ad41fb75ac4226e1f3ce.js → 18.1922fd0b2b7fad122991.js} +3 -3
- udata/static/chunks/18.1922fd0b2b7fad122991.js.map +1 -0
- udata/static/chunks/{7.11ac4de064ae59691d49.js → 7.e2106342e94ee09393b1.js} +2 -2
- udata/static/chunks/7.e2106342e94ee09393b1.js.map +1 -0
- udata/static/common.js +1 -1
- udata/static/common.js.map +1 -1
- udata/storage/s3.py +3 -3
- udata/tasks.py +1 -0
- udata/tests/api/test_dataservices_api.py +26 -2
- udata/tests/api/test_datasets_api.py +1 -1
- udata/tests/api/test_reports_api.py +87 -0
- udata/tests/apiv2/test_me_api.py +40 -0
- udata/tests/dataset/test_dataset_rdf.py +19 -1
- udata/tests/frontend/test_auth.py +1 -4
- udata/tests/organization/test_csv_adapter.py +0 -1
- udata/tests/plugin.py +2 -0
- udata/tests/site/test_site_api.py +0 -1
- udata/tests/site/test_site_rdf.py +66 -0
- udata/tests/test_discussions.py +24 -34
- udata/tests/test_model.py +3 -2
- udata/tests/test_utils.py +1 -1
- udata/translations/ar/LC_MESSAGES/udata.mo +0 -0
- udata/translations/ar/LC_MESSAGES/udata.po +128 -64
- udata/translations/de/LC_MESSAGES/udata.mo +0 -0
- udata/translations/de/LC_MESSAGES/udata.po +128 -64
- udata/translations/es/LC_MESSAGES/udata.mo +0 -0
- udata/translations/es/LC_MESSAGES/udata.po +128 -64
- udata/translations/fr/LC_MESSAGES/udata.mo +0 -0
- udata/translations/fr/LC_MESSAGES/udata.po +128 -64
- udata/translations/it/LC_MESSAGES/udata.mo +0 -0
- udata/translations/it/LC_MESSAGES/udata.po +128 -64
- udata/translations/pt/LC_MESSAGES/udata.mo +0 -0
- udata/translations/pt/LC_MESSAGES/udata.po +128 -64
- udata/translations/sr/LC_MESSAGES/udata.mo +0 -0
- udata/translations/sr/LC_MESSAGES/udata.po +128 -64
- udata/translations/udata.pot +129 -65
- udata/uris.py +14 -13
- {udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/METADATA +26 -7
- {udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/RECORD +84 -72
- udata/static/chunks/18.ad41fb75ac4226e1f3ce.js.map +0 -1
- udata/static/chunks/7.11ac4de064ae59691d49.js.map +0 -1
- {udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/LICENSE +0 -0
- {udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/WHEEL +0 -0
- {udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/entry_points.txt +0 -0
- {udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/top_level.txt +0 -0
udata/__init__.py
CHANGED
udata/api/__init__.py
CHANGED
|
@@ -323,6 +323,7 @@ def init_app(app):
|
|
|
323
323
|
import udata.core.activity.api # noqa
|
|
324
324
|
import udata.core.spatial.api # noqa
|
|
325
325
|
import udata.core.user.api # noqa
|
|
326
|
+
import udata.core.user.apiv2 # noqa
|
|
326
327
|
import udata.core.dataset.api # noqa
|
|
327
328
|
import udata.core.dataset.apiv2 # noqa
|
|
328
329
|
import udata.core.dataservices.api # noqa
|
|
@@ -333,6 +334,7 @@ def init_app(app):
|
|
|
333
334
|
import udata.core.organization.apiv2 # noqa
|
|
334
335
|
import udata.core.followers.api # noqa
|
|
335
336
|
import udata.core.jobs.api # noqa
|
|
337
|
+
import udata.core.reports.api # noqa
|
|
336
338
|
import udata.core.site.api # noqa
|
|
337
339
|
import udata.core.tags.api # noqa
|
|
338
340
|
import udata.core.topic.api # noqa
|
udata/api/commands.py
CHANGED
udata/api_fields.py
CHANGED
|
@@ -38,6 +38,9 @@ def convert_db_to_field(key, field, info = {}):
|
|
|
38
38
|
constructor = restx_fields.String
|
|
39
39
|
params['min_length'] = field.min_length
|
|
40
40
|
params['max_length'] = field.max_length
|
|
41
|
+
params['enum'] = field.choices
|
|
42
|
+
elif isinstance(field, mongo_fields.ObjectIdField):
|
|
43
|
+
constructor = restx_fields.String
|
|
41
44
|
elif isinstance(field, mongo_fields.FloatField):
|
|
42
45
|
constructor = restx_fields.Float
|
|
43
46
|
params['min'] = field.min # TODO min_value?
|
|
@@ -70,10 +73,14 @@ def convert_db_to_field(key, field, info = {}):
|
|
|
70
73
|
constructor_write = restx_fields.String
|
|
71
74
|
elif isinstance(field, mongo_fields.EmbeddedDocumentField):
|
|
72
75
|
nested_fields = info.get('nested_fields')
|
|
73
|
-
if nested_fields is None:
|
|
74
|
-
|
|
76
|
+
if nested_fields is not None:
|
|
77
|
+
constructor = lambda **kwargs: restx_fields.Nested(nested_fields, **kwargs)
|
|
78
|
+
elif hasattr(field.document_type_obj, '__read_fields__'):
|
|
79
|
+
constructor_read = lambda **kwargs: restx_fields.Nested(field.document_type_obj.__read_fields__, **kwargs)
|
|
80
|
+
constructor_write = lambda **kwargs: restx_fields.Nested(field.document_type_obj.__write_fields__, **kwargs)
|
|
81
|
+
else:
|
|
82
|
+
raise ValueError(f"EmbeddedDocumentField `{key}` requires a `nested_fields` param to serialize/deserialize or a `@generate_fields()` definition.")
|
|
75
83
|
|
|
76
|
-
constructor = lambda **kwargs: restx_fields.Nested(nested_fields, **kwargs)
|
|
77
84
|
else:
|
|
78
85
|
raise ValueError(f"Unsupported MongoEngine field type {field.__class__.__name__}")
|
|
79
86
|
|
|
@@ -96,6 +103,7 @@ def generate_fields(**kwargs):
|
|
|
96
103
|
read_fields = {}
|
|
97
104
|
write_fields = {}
|
|
98
105
|
sortables = []
|
|
106
|
+
filterables = []
|
|
99
107
|
|
|
100
108
|
read_fields['id'] = restx_fields.String(required=True)
|
|
101
109
|
|
|
@@ -106,6 +114,23 @@ def generate_fields(**kwargs):
|
|
|
106
114
|
if info.get('sortable', False):
|
|
107
115
|
sortables.append(key)
|
|
108
116
|
|
|
117
|
+
filterable = info.get('filterable', None)
|
|
118
|
+
if filterable is not None:
|
|
119
|
+
if 'key' not in filterable:
|
|
120
|
+
filterable['key'] = key
|
|
121
|
+
if 'column' not in filterable:
|
|
122
|
+
filterable['column'] = key
|
|
123
|
+
|
|
124
|
+
if 'constraints' not in filterable:
|
|
125
|
+
filterable['constraints'] = []
|
|
126
|
+
if isinstance(field, mongo_fields.ReferenceField) or (isinstance(field, mongo_fields.ListField) and isinstance(field.field, mongo_fields.ReferenceField)):
|
|
127
|
+
filterable['constraints'].append('objectid')
|
|
128
|
+
|
|
129
|
+
# We may add more information later here:
|
|
130
|
+
# - type of mongo query to execute (right now only simple =)
|
|
131
|
+
|
|
132
|
+
filterables.append(filterable)
|
|
133
|
+
|
|
109
134
|
read, write = convert_db_to_field(key, field)
|
|
110
135
|
|
|
111
136
|
if read:
|
|
@@ -159,6 +184,9 @@ def generate_fields(**kwargs):
|
|
|
159
184
|
choices = sortables + ['-' + k for k in sortables]
|
|
160
185
|
parser.add_argument('sort', type=str, location='args', choices=choices, help='The field (and direction) on which sorting apply')
|
|
161
186
|
|
|
187
|
+
for filterable in filterables:
|
|
188
|
+
parser.add_argument(filterable['key'], type=str, location='args')
|
|
189
|
+
|
|
162
190
|
cls.__index_parser__ = parser
|
|
163
191
|
def apply_sort_filters_and_pagination(base_query):
|
|
164
192
|
args = cls.__index_parser__.parse_args()
|
|
@@ -166,6 +194,16 @@ def generate_fields(**kwargs):
|
|
|
166
194
|
if sortables and args['sort']:
|
|
167
195
|
base_query = base_query.order_by(args['sort'])
|
|
168
196
|
|
|
197
|
+
for filterable in filterables:
|
|
198
|
+
if args.get(filterable['key']):
|
|
199
|
+
for constraint in filterable['constraints']:
|
|
200
|
+
if constraint == 'objectid' and not ObjectId.is_valid(args[filterable['key']]):
|
|
201
|
+
api.abort(400, f'`{filterable["key"]}` must be an identifier')
|
|
202
|
+
|
|
203
|
+
base_query = base_query.filter(**{
|
|
204
|
+
filterable['column']: args[filterable['key']],
|
|
205
|
+
})
|
|
206
|
+
|
|
169
207
|
if paginable:
|
|
170
208
|
base_query = base_query.paginate(args['page'], args['page_size'])
|
|
171
209
|
|
udata/commands/db.py
CHANGED
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import collections
|
|
2
|
+
from itertools import groupby
|
|
2
3
|
import logging
|
|
3
4
|
import os
|
|
5
|
+
import traceback
|
|
4
6
|
|
|
7
|
+
from bson import DBRef
|
|
5
8
|
import click
|
|
6
9
|
import mongoengine
|
|
7
10
|
|
|
@@ -135,8 +138,14 @@ def display_op(op):
|
|
|
135
138
|
echo('{label:.<70} [{date}]'.format(label=label, date=timestamp))
|
|
136
139
|
format_output(op['output'], success=op['success'], traceback=op.get('traceback'))
|
|
137
140
|
|
|
138
|
-
|
|
139
141
|
def check_references(models_to_check):
|
|
142
|
+
# Cannot modify local scope from Python… :-(
|
|
143
|
+
class Log: errors = []
|
|
144
|
+
|
|
145
|
+
def print_and_save(text: str):
|
|
146
|
+
Log.errors.append(text.strip())
|
|
147
|
+
print(text)
|
|
148
|
+
|
|
140
149
|
errors = collections.defaultdict(int)
|
|
141
150
|
|
|
142
151
|
_models = []
|
|
@@ -147,7 +156,7 @@ def check_references(models_to_check):
|
|
|
147
156
|
]
|
|
148
157
|
|
|
149
158
|
references = []
|
|
150
|
-
for model in _models:
|
|
159
|
+
for model in set(_models):
|
|
151
160
|
if model.__name__ == 'Activity':
|
|
152
161
|
print(f'Skipping Activity model, scheduled for deprecation')
|
|
153
162
|
continue
|
|
@@ -240,53 +249,84 @@ def check_references(models_to_check):
|
|
|
240
249
|
print(f'- {reference["repr"]}({reference["destination"]}) — {reference["type"]}')
|
|
241
250
|
print('')
|
|
242
251
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
_ = sub.id
|
|
258
|
-
except mongoengine.errors.DoesNotExist:
|
|
259
|
-
errors[reference["repr"]] += 1
|
|
260
|
-
elif reference['type'] == 'embed_list':
|
|
261
|
-
p1, p2 = reference['name'].split('__')
|
|
262
|
-
for sub in getattr(obj, p1):
|
|
263
|
-
try:
|
|
264
|
-
getattr(sub, p2)
|
|
265
|
-
except mongoengine.errors.DoesNotExist:
|
|
266
|
-
errors[reference["repr"]] += 1
|
|
267
|
-
elif reference['type'] == 'embed':
|
|
268
|
-
p1, p2 = reference['name'].split('__')
|
|
269
|
-
sub = getattr(obj, p1)
|
|
270
|
-
try:
|
|
271
|
-
getattr(sub, p2)
|
|
272
|
-
except mongoengine.errors.DoesNotExist:
|
|
273
|
-
errors[reference["repr"]] += 1
|
|
274
|
-
elif reference['type'] == 'embed_list_ref':
|
|
275
|
-
p1, p2 = reference['name'].split('__')
|
|
276
|
-
sub = getattr(getattr(obj, p1), p2)
|
|
277
|
-
for obj in sub:
|
|
278
|
-
try:
|
|
279
|
-
obj.id
|
|
280
|
-
except mongoengine.errors.DoesNotExist:
|
|
281
|
-
errors[reference["repr"]] += 1
|
|
282
|
-
else:
|
|
283
|
-
print(f'Unknown ref type {reference["type"]}')
|
|
284
|
-
print('Errors:', errors[reference["repr"]])
|
|
285
|
-
except mongoengine.errors.FieldDoesNotExist as e:
|
|
286
|
-
print('[ERROR]', e)
|
|
287
|
-
|
|
288
|
-
print(f'\n Total errors: {sum(errors.values())}')
|
|
252
|
+
total = 0
|
|
253
|
+
for model, model_references in groupby(references, lambda i: i["model"]):
|
|
254
|
+
model_references = list(model_references)
|
|
255
|
+
count = model.objects.count()
|
|
256
|
+
print(f'- doing {count} {model.__name__}…')
|
|
257
|
+
errors[model] = {}
|
|
258
|
+
|
|
259
|
+
qs = model.objects().no_cache().all()
|
|
260
|
+
with click.progressbar(qs, length=count) as models:
|
|
261
|
+
for obj in models:
|
|
262
|
+
for reference in model_references:
|
|
263
|
+
key = f'\t- {reference["repr"]}({reference["destination"]}) — {reference["type"]}…'
|
|
264
|
+
if key not in errors[model]:
|
|
265
|
+
errors[model][key] = 0
|
|
289
266
|
|
|
267
|
+
try:
|
|
268
|
+
if reference['type'] == 'direct':
|
|
269
|
+
try:
|
|
270
|
+
_ = getattr(obj, reference['name'])
|
|
271
|
+
except mongoengine.errors.DoesNotExist:
|
|
272
|
+
errors[model][key] += 1
|
|
273
|
+
print_and_save(f'\t{model.__name__}#{obj.id} have a broken reference for `{reference["name"]}`')
|
|
274
|
+
elif reference['type'] == 'list':
|
|
275
|
+
attr_list = getattr(obj, reference['name'], [])
|
|
276
|
+
for i, sub in enumerate(attr_list):
|
|
277
|
+
# If it's still an instance of DBRef it means that it failed to
|
|
278
|
+
# dereference the ID.
|
|
279
|
+
if isinstance(sub, DBRef):
|
|
280
|
+
errors[model][key] += 1
|
|
281
|
+
print_and_save(f'\t{model.__name__}#{obj.id} have a broken reference for {reference["name"]}[{i}]')
|
|
282
|
+
elif reference['type'] == 'embed_list':
|
|
283
|
+
p1, p2 = reference['name'].split('__')
|
|
284
|
+
attr_list = getattr(obj, p1, [])
|
|
285
|
+
for i, sub in enumerate(attr_list):
|
|
286
|
+
try:
|
|
287
|
+
getattr(sub, p2)
|
|
288
|
+
except mongoengine.errors.DoesNotExist:
|
|
289
|
+
errors[model][key] += 1
|
|
290
|
+
print_and_save(f'\t{model.__name__}#{obj.id} have a broken reference for {p1}[{i}].{p2}')
|
|
291
|
+
elif reference['type'] == 'embed':
|
|
292
|
+
p1, p2 = reference['name'].split('__')
|
|
293
|
+
sub = getattr(obj, p1)
|
|
294
|
+
if sub is None: continue
|
|
295
|
+
try:
|
|
296
|
+
getattr(sub, p2)
|
|
297
|
+
except mongoengine.errors.DoesNotExist:
|
|
298
|
+
errors[model][key] += 1
|
|
299
|
+
print_and_save(f'\t{model.__name__}#{obj.id} have a broken reference for {p1}.{p2}')
|
|
300
|
+
elif reference['type'] == 'embed_list_ref':
|
|
301
|
+
p1, p2 = reference['name'].split('__')
|
|
302
|
+
a = getattr(obj, p1)
|
|
303
|
+
if a is None: continue
|
|
304
|
+
sub = getattr(a, p2, [])
|
|
305
|
+
for i, child in enumerate(sub):
|
|
306
|
+
# If it's still an instance of DBRef it means that it failed to
|
|
307
|
+
# dereference the ID.
|
|
308
|
+
if isinstance(child, DBRef):
|
|
309
|
+
errors[model][key] += 1
|
|
310
|
+
print_and_save(f'\t{model.__name__}#{obj.id} have a broken reference for {p1}.{p2}[{i}]')
|
|
311
|
+
else:
|
|
312
|
+
print_and_save(f'Unknown ref type {reference["type"]}')
|
|
313
|
+
except mongoengine.errors.FieldDoesNotExist as e:
|
|
314
|
+
print_and_save(f'[ERROR for {model.__name__} {obj.id}] {traceback.format_exc()}')
|
|
315
|
+
|
|
316
|
+
for key, nb_errors in errors[model].items():
|
|
317
|
+
print(f'{key}: {nb_errors}')
|
|
318
|
+
total += nb_errors
|
|
319
|
+
|
|
320
|
+
print(f'\n Total errors: {total}')
|
|
321
|
+
|
|
322
|
+
if total > 0:
|
|
323
|
+
try:
|
|
324
|
+
import sentry_sdk
|
|
325
|
+
with sentry_sdk.push_scope() as scope:
|
|
326
|
+
scope.set_extra("errors", Log.errors)
|
|
327
|
+
sentry_sdk.capture_message(f"{total} integrity errors", "fatal")
|
|
328
|
+
except ImportError:
|
|
329
|
+
print("`sentry_sdk` not installed. The errors weren't reported")
|
|
290
330
|
|
|
291
331
|
@grp.command()
|
|
292
332
|
@click.option('--models', multiple=True, default=[], help='Model(s) to check')
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import factory
|
|
2
|
+
|
|
3
|
+
from udata.core.dataservices.models import Dataservice, HarvestMetadata
|
|
4
|
+
from udata.core.organization.factories import OrganizationFactory
|
|
5
|
+
from udata.factories import ModelFactory
|
|
6
|
+
|
|
7
|
+
class HarvestMetadataFactory(ModelFactory):
|
|
8
|
+
class Meta:
|
|
9
|
+
model = HarvestMetadata
|
|
10
|
+
|
|
11
|
+
backend = 'csw-dcat'
|
|
12
|
+
domain = 'data.gouv.fr'
|
|
13
|
+
|
|
14
|
+
source_id = factory.Faker('unique_string')
|
|
15
|
+
source_url = factory.Faker('url')
|
|
16
|
+
|
|
17
|
+
remote_id = factory.Faker('unique_string')
|
|
18
|
+
remote_url = factory.Faker('url')
|
|
19
|
+
|
|
20
|
+
uri = factory.Faker('url')
|
|
21
|
+
|
|
22
|
+
class DataserviceFactory(ModelFactory):
|
|
23
|
+
class Meta:
|
|
24
|
+
model = Dataservice
|
|
25
|
+
|
|
26
|
+
title = factory.Faker('sentence')
|
|
27
|
+
description = factory.Faker('text')
|
|
28
|
+
base_api_url = factory.Faker('url')
|
|
29
|
+
|
|
30
|
+
class Params:
|
|
31
|
+
org = factory.Trait(
|
|
32
|
+
organization=factory.SubFactory(OrganizationFactory),
|
|
33
|
+
)
|
|
@@ -31,6 +31,35 @@ class DataserviceQuerySet(OwnedQuerySet):
|
|
|
31
31
|
db.Q(deleted_at__ne=None) |
|
|
32
32
|
db.Q(archived_at__ne=None))
|
|
33
33
|
|
|
34
|
+
@generate_fields()
|
|
35
|
+
class HarvestMetadata(db.EmbeddedDocument):
|
|
36
|
+
backend = field(db.StringField())
|
|
37
|
+
domain = field(db.StringField())
|
|
38
|
+
|
|
39
|
+
source_id = field(db.StringField())
|
|
40
|
+
source_url = field(db.URLField())
|
|
41
|
+
|
|
42
|
+
remote_id = field(db.StringField())
|
|
43
|
+
remote_url = field(db.URLField())
|
|
44
|
+
|
|
45
|
+
# If the node ID is a `URIRef` it means it links to something external, if it's not an `URIRef` it's often a
|
|
46
|
+
# auto-generated ID just to link multiple RDF node togethers. When exporting as RDF to other catalogs, we
|
|
47
|
+
# want to re-use this node ID (only if it's not auto-generated) to improve compatibility.
|
|
48
|
+
uri = field(
|
|
49
|
+
db.URLField(),
|
|
50
|
+
description="RDF node ID if it's an `URIRef`. `None` if it's not present or if it's a random auto-generated ID inside the graph.",
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
created_at = field(
|
|
54
|
+
db.DateTimeField(),
|
|
55
|
+
description="Date of the creation as provided by the harvested catalog"
|
|
56
|
+
)
|
|
57
|
+
last_update = field(
|
|
58
|
+
db.DateTimeField(),
|
|
59
|
+
description="Date of the last harvesting"
|
|
60
|
+
)
|
|
61
|
+
archived_at = field(db.DateTimeField())
|
|
62
|
+
|
|
34
63
|
@generate_fields()
|
|
35
64
|
class Dataservice(WithMetrics, Owned, db.Document):
|
|
36
65
|
meta = {
|
|
@@ -111,17 +140,26 @@ class Dataservice(WithMetrics, Owned, db.Document):
|
|
|
111
140
|
db.ListField(
|
|
112
141
|
field(
|
|
113
142
|
db.ReferenceField(Dataset),
|
|
114
|
-
nested_fields=datasets_api_fields.
|
|
143
|
+
nested_fields=datasets_api_fields.dataset_ref_fields,
|
|
115
144
|
)
|
|
116
|
-
)
|
|
145
|
+
),
|
|
146
|
+
filterable={
|
|
147
|
+
'key': 'dataset',
|
|
148
|
+
},
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
harvest = field(
|
|
152
|
+
db.EmbeddedDocumentField(HarvestMetadata),
|
|
153
|
+
readonly=True,
|
|
117
154
|
)
|
|
118
155
|
|
|
119
156
|
@function_field(description="Link to the API endpoint for this dataservice")
|
|
120
157
|
def self_api_url(self):
|
|
121
158
|
return endpoint_for('api.dataservice', dataservice=self, _external=True)
|
|
122
159
|
|
|
123
|
-
|
|
124
|
-
|
|
160
|
+
@function_field(description="Link to the udata web page for this dataservice")
|
|
161
|
+
def self_web_url(self):
|
|
162
|
+
return endpoint_for('dataservices.show', dataservice=self, _external=True)
|
|
125
163
|
|
|
126
164
|
# TODO
|
|
127
165
|
# frequency = db.StringField(choices=list(UPDATE_FREQUENCIES.keys()))
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
|
|
2
|
+
from rdflib import RDF, BNode, Graph, Literal, URIRef
|
|
3
|
+
|
|
4
|
+
from udata.core.dataservices.models import Dataservice, HarvestMetadata as HarvestDataserviceMetadata
|
|
5
|
+
from udata.core.dataset.models import Dataset, License
|
|
6
|
+
from udata.core.dataset.rdf import dataset_to_graph_id, sanitize_html
|
|
7
|
+
from udata.rdf import namespace_manager, DCAT, DCT, contact_point_from_rdf, rdf_value, remote_url_from_rdf, themes_from_rdf, url_from_rdf
|
|
8
|
+
from udata.uris import endpoint_for
|
|
9
|
+
|
|
10
|
+
def dataservice_from_rdf(graph: Graph, dataservice: Dataservice, node, all_datasets: list[Dataset]) -> Dataservice :
|
|
11
|
+
'''
|
|
12
|
+
Create or update a dataset from a RDF/DCAT graph
|
|
13
|
+
'''
|
|
14
|
+
if node is None: # Assume first match is the only match
|
|
15
|
+
node = graph.value(predicate=RDF.type, object=DCAT.DataService)
|
|
16
|
+
|
|
17
|
+
d = graph.resource(node)
|
|
18
|
+
|
|
19
|
+
dataservice.title = rdf_value(d, DCT.title)
|
|
20
|
+
dataservice.description = sanitize_html(d.value(DCT.description) or d.value(DCT.abstract))
|
|
21
|
+
|
|
22
|
+
dataservice.base_api_url = url_from_rdf(d, DCAT.endpointURL)
|
|
23
|
+
dataservice.endpoint_description_url = url_from_rdf(d, DCAT.endpointDescription)
|
|
24
|
+
|
|
25
|
+
dataservice.contact_point = contact_point_from_rdf(d, dataservice) or dataservice.contact_point
|
|
26
|
+
|
|
27
|
+
datasets = []
|
|
28
|
+
for dataset_node in d.objects(DCAT.servesDataset):
|
|
29
|
+
id = dataset_node.value(DCT.identifier)
|
|
30
|
+
dataset = next((d for d in all_datasets if d is not None and d.harvest.remote_id == id), None)
|
|
31
|
+
|
|
32
|
+
if dataset is None:
|
|
33
|
+
# We try with `endswith` because Europe XSLT have problems with IDs. Sometimes they are prefixed with the domain of the catalog, sometimes not.
|
|
34
|
+
dataset = next((d for d in all_datasets if d is not None and d.harvest.remote_id.endswith(id)), None)
|
|
35
|
+
|
|
36
|
+
if dataset is not None:
|
|
37
|
+
datasets.append(dataset.id)
|
|
38
|
+
|
|
39
|
+
if datasets:
|
|
40
|
+
dataservice.datasets = datasets
|
|
41
|
+
|
|
42
|
+
license = rdf_value(d, DCT.license)
|
|
43
|
+
if license is not None:
|
|
44
|
+
dataservice.license = License.guess(license)
|
|
45
|
+
|
|
46
|
+
if not dataservice.harvest:
|
|
47
|
+
dataservice.harvest = HarvestDataserviceMetadata()
|
|
48
|
+
|
|
49
|
+
dataservice.harvest.uri = d.identifier.toPython() if isinstance(d.identifier, URIRef) else None
|
|
50
|
+
dataservice.harvest.remote_url = remote_url_from_rdf(d)
|
|
51
|
+
dataservice.harvest.created_at = rdf_value(d, DCT.issued)
|
|
52
|
+
dataservice.metadata_modified_at = rdf_value(d, DCT.modified)
|
|
53
|
+
|
|
54
|
+
dataservice.tags = themes_from_rdf(d)
|
|
55
|
+
|
|
56
|
+
return dataservice
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def dataservice_to_rdf(dataservice: Dataservice, graph=None):
|
|
60
|
+
'''
|
|
61
|
+
Map a dataservice domain model to a DCAT/RDF graph
|
|
62
|
+
'''
|
|
63
|
+
# Use the unlocalized permalink to the dataset as URI when available
|
|
64
|
+
# unless there is already an upstream URI
|
|
65
|
+
if dataservice.harvest and dataservice.harvest.uri:
|
|
66
|
+
id = URIRef(dataservice.harvest.uri)
|
|
67
|
+
elif dataservice.id:
|
|
68
|
+
id = URIRef(endpoint_for('dataservices.show_redirect', 'api.dataservice',
|
|
69
|
+
dataservice=dataservice.id, _external=True))
|
|
70
|
+
else:
|
|
71
|
+
# Should not happen in production. Some test only
|
|
72
|
+
# `build()` a dataset without saving it to the DB.
|
|
73
|
+
id = BNode()
|
|
74
|
+
|
|
75
|
+
# Expose upstream identifier if present
|
|
76
|
+
if dataservice.harvest:
|
|
77
|
+
identifier = dataservice.harvest.remote_id
|
|
78
|
+
else:
|
|
79
|
+
identifier = dataservice.id
|
|
80
|
+
graph = graph or Graph(namespace_manager=namespace_manager)
|
|
81
|
+
|
|
82
|
+
d = graph.resource(id)
|
|
83
|
+
d.set(RDF.type, DCAT.DataService)
|
|
84
|
+
d.set(DCT.identifier, Literal(identifier))
|
|
85
|
+
d.set(DCT.title, Literal(dataservice.title))
|
|
86
|
+
d.set(DCT.description, Literal(dataservice.description))
|
|
87
|
+
d.set(DCT.issued, Literal(dataservice.created_at))
|
|
88
|
+
|
|
89
|
+
if dataservice.base_api_url:
|
|
90
|
+
d.set(DCAT.endpointURL, Literal(dataservice.base_api_url))
|
|
91
|
+
|
|
92
|
+
if dataservice.endpoint_description_url:
|
|
93
|
+
d.set(DCAT.endpointDescription, Literal(dataservice.endpoint_description_url))
|
|
94
|
+
|
|
95
|
+
for tag in dataservice.tags:
|
|
96
|
+
d.add(DCAT.keyword, Literal(tag))
|
|
97
|
+
|
|
98
|
+
# `dataset_to_graph_id(dataset)` URIRef may not exist in the current page
|
|
99
|
+
# but should exists in the catalog somewhere. Maybe we should create a Node
|
|
100
|
+
# with some basic information about this dataset (but this will return a page
|
|
101
|
+
# with more datasets than the page size… and could be problematic when processing the
|
|
102
|
+
# correct Node with all the information in a future page)
|
|
103
|
+
for dataset in dataservice.datasets:
|
|
104
|
+
d.add(DCAT.servesDataset, dataset_to_graph_id(dataset))
|
|
105
|
+
|
|
106
|
+
return d
|
udata/core/dataset/csv.py
CHANGED
|
@@ -19,6 +19,9 @@ class DatasetCsvAdapter(csv.Adapter):
|
|
|
19
19
|
('url', 'external_url'),
|
|
20
20
|
('organization', 'organization.name'),
|
|
21
21
|
('organization_id', 'organization.id'),
|
|
22
|
+
('owner', 'owner.slug'), # in case it's owned by a user, or introduce 'owner_type'?
|
|
23
|
+
('owner_id', 'owner.id'),
|
|
24
|
+
# 'contact_point', # ?
|
|
22
25
|
'description',
|
|
23
26
|
'frequency',
|
|
24
27
|
'license',
|
|
@@ -26,19 +29,20 @@ class DatasetCsvAdapter(csv.Adapter):
|
|
|
26
29
|
'temporal_coverage.end',
|
|
27
30
|
'spatial.granularity',
|
|
28
31
|
('spatial.zones', serialize_spatial_zones),
|
|
29
|
-
'private',
|
|
30
32
|
('featured', lambda o: o.featured or False),
|
|
31
33
|
'created_at',
|
|
32
34
|
'last_modified',
|
|
33
35
|
('tags', lambda o: ','.join(o.tags)),
|
|
34
36
|
('archived', lambda o: o.archived or False),
|
|
35
37
|
('resources_count', lambda o: len(o.resources)),
|
|
38
|
+
('main_resources_count', lambda o: len([r for r in o.resources if r.type == 'main'])),
|
|
36
39
|
'downloads',
|
|
37
40
|
('harvest.backend', lambda r: r.harvest and r.harvest.backend),
|
|
38
41
|
('harvest.domain', lambda r: r.harvest and r.harvest.domain),
|
|
39
42
|
('harvest.created_at', lambda r: r.harvest and r.harvest.created_at),
|
|
40
43
|
('harvest.modified_at', lambda r: r.harvest and r.harvest.modified_at),
|
|
41
44
|
('quality_score', lambda o: format(o.quality['score'], '.2f')),
|
|
45
|
+
# schema? what is the schema of a dataset?
|
|
42
46
|
)
|
|
43
47
|
|
|
44
48
|
def dynamic_fields(self):
|
|
@@ -85,6 +89,9 @@ class ResourcesCsvAdapter(csv.NestedAdapter):
|
|
|
85
89
|
('downloads', lambda o: int(o.metrics.get('views', 0))),
|
|
86
90
|
('harvest.created_at', lambda o: o.harvest and o.harvest.created_at),
|
|
87
91
|
('harvest.modified_at', lambda o: o.harvest and o.harvest.modified_at),
|
|
92
|
+
('schema_name', 'schema.name'),
|
|
93
|
+
('schema_version', 'schema.version'),
|
|
94
|
+
('preview_url', lambda o: o.preview_url or False),
|
|
88
95
|
)
|
|
89
96
|
attribute = 'resources'
|
|
90
97
|
|
udata/core/dataset/models.py
CHANGED
|
@@ -13,7 +13,6 @@ from pydoc import locate
|
|
|
13
13
|
from stringdist import rdlevenshtein
|
|
14
14
|
from werkzeug.utils import cached_property
|
|
15
15
|
import requests
|
|
16
|
-
from typing import Optional, Tuple
|
|
17
16
|
|
|
18
17
|
from udata.app import cache
|
|
19
18
|
from udata.core import storages
|
|
@@ -964,7 +963,7 @@ class ResourceSchema(object):
|
|
|
964
963
|
def assignable_schemas():
|
|
965
964
|
return [s for s in ResourceSchema.all() if s.get('schema_type') not in NON_ASSIGNABLE_SCHEMA_TYPES]
|
|
966
965
|
|
|
967
|
-
def get_existing_schema_info_by_url(url: str) ->
|
|
966
|
+
def get_existing_schema_info_by_url(url: str) -> tuple[str, str | None] | None:
|
|
968
967
|
'''
|
|
969
968
|
Returns the name and the version if exists
|
|
970
969
|
'''
|