udata 13.0.1.dev12__py3-none-any.whl → 14.4.1.dev7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of udata might be problematic. Click here for more details.
- udata/api/__init__.py +2 -8
- udata/api_fields.py +35 -4
- udata/app.py +30 -50
- udata/auth/__init__.py +29 -6
- udata/auth/forms.py +8 -6
- udata/auth/views.py +6 -3
- udata/commands/__init__.py +2 -14
- udata/commands/db.py +13 -25
- udata/commands/info.py +0 -16
- udata/commands/serve.py +3 -11
- udata/commands/tests/test_fixtures.py +9 -9
- udata/core/access_type/api.py +1 -1
- udata/core/access_type/constants.py +12 -8
- udata/core/activity/api.py +5 -6
- udata/core/avatars/api.py +43 -0
- udata/core/avatars/test_avatar_api.py +30 -0
- udata/core/badges/tests/test_commands.py +6 -6
- udata/core/csv.py +5 -0
- udata/core/dataservices/models.py +15 -3
- udata/core/dataservices/tasks.py +7 -0
- udata/core/dataset/api.py +2 -0
- udata/core/dataset/models.py +2 -2
- udata/core/dataset/permissions.py +31 -0
- udata/core/dataset/tasks.py +50 -10
- udata/core/discussions/models.py +1 -0
- udata/core/metrics/__init__.py +0 -6
- udata/core/organization/api.py +8 -5
- udata/core/organization/mails.py +1 -1
- udata/core/organization/models.py +9 -1
- udata/core/organization/notifications.py +84 -0
- udata/core/organization/permissions.py +1 -1
- udata/core/organization/tasks.py +3 -0
- udata/core/pages/tests/test_api.py +32 -0
- udata/core/post/api.py +24 -69
- udata/core/post/models.py +84 -16
- udata/core/post/tests/test_api.py +24 -1
- udata/core/reports/api.py +18 -0
- udata/core/reports/models.py +42 -2
- udata/core/reuse/models.py +1 -1
- udata/core/reuse/tasks.py +7 -0
- udata/core/site/models.py +2 -6
- udata/core/spatial/commands.py +2 -4
- udata/core/spatial/forms.py +2 -2
- udata/core/spatial/models.py +0 -10
- udata/core/spatial/tests/test_api.py +1 -36
- udata/core/user/models.py +15 -2
- udata/cors.py +2 -5
- udata/db/migrations.py +279 -0
- udata/features/notifications/api.py +7 -18
- udata/features/notifications/models.py +56 -0
- udata/features/notifications/tasks.py +25 -0
- udata/flask_mongoengine/engine.py +0 -4
- udata/frontend/__init__.py +3 -122
- udata/frontend/markdown.py +2 -1
- udata/harvest/actions.py +24 -9
- udata/harvest/api.py +30 -22
- udata/harvest/backends/__init__.py +21 -9
- udata/harvest/backends/base.py +29 -3
- udata/harvest/backends/ckan/harvesters.py +13 -2
- udata/harvest/backends/dcat.py +3 -0
- udata/harvest/backends/maaf.py +1 -0
- udata/harvest/commands.py +39 -4
- udata/harvest/filters.py +17 -6
- udata/harvest/forms.py +9 -6
- udata/harvest/models.py +16 -0
- udata/harvest/permissions.py +27 -0
- udata/harvest/tasks.py +3 -5
- udata/harvest/tests/ckan/test_ckan_backend.py +35 -2
- udata/harvest/tests/ckan/test_ckan_backend_errors.py +1 -1
- udata/harvest/tests/ckan/test_ckan_backend_filters.py +1 -1
- udata/harvest/tests/ckan/test_dkan_backend.py +1 -1
- udata/harvest/tests/dcat/udata.xml +6 -6
- udata/harvest/tests/factories.py +1 -1
- udata/harvest/tests/test_actions.py +63 -8
- udata/harvest/tests/test_api.py +278 -123
- udata/harvest/tests/test_base_backend.py +88 -1
- udata/harvest/tests/test_dcat_backend.py +60 -13
- udata/harvest/tests/test_filters.py +6 -0
- udata/i18n.py +11 -273
- udata/mail.py +5 -1
- udata/migrations/2025-10-31-create-membership-request-notifications.py +55 -0
- udata/migrations/2025-11-13-delete-user-email-index.py +25 -0
- udata/migrations/2025-12-04-add-uuid-to-discussion-messages.py +28 -0
- udata/models/__init__.py +0 -8
- udata/mongo/slug_fields.py +1 -1
- udata/rdf.py +45 -6
- udata/routing.py +2 -10
- udata/sentry.py +4 -10
- udata/settings.py +23 -17
- udata/tasks.py +4 -3
- udata/templates/mail/message.html +5 -31
- udata/tests/__init__.py +28 -12
- udata/tests/api/__init__.py +108 -21
- udata/tests/api/test_activities_api.py +36 -0
- udata/tests/api/test_auth_api.py +121 -95
- udata/tests/api/test_base_api.py +7 -4
- udata/tests/api/test_dataservices_api.py +29 -1
- udata/tests/api/test_datasets_api.py +45 -21
- udata/tests/api/test_organizations_api.py +192 -197
- udata/tests/api/test_reports_api.py +157 -0
- udata/tests/api/test_reuses_api.py +147 -147
- udata/tests/api/test_security_api.py +12 -12
- udata/tests/api/test_swagger.py +4 -4
- udata/tests/api/test_tags_api.py +8 -8
- udata/tests/api/test_user_api.py +13 -1
- udata/tests/apiv2/test_swagger.py +4 -4
- udata/tests/apiv2/test_topics.py +1 -1
- udata/tests/cli/test_cli_base.py +8 -9
- udata/tests/dataset/test_dataset_commands.py +4 -4
- udata/tests/dataset/test_dataset_model.py +66 -26
- udata/tests/dataset/test_dataset_rdf.py +99 -5
- udata/tests/dataset/test_resource_preview.py +0 -1
- udata/tests/frontend/test_auth.py +24 -1
- udata/tests/frontend/test_csv.py +0 -3
- udata/tests/helpers.py +37 -27
- udata/tests/organization/test_notifications.py +67 -2
- udata/tests/plugin.py +6 -261
- udata/tests/site/test_site_csv_exports.py +22 -10
- udata/tests/test_activity.py +9 -9
- udata/tests/test_cors.py +1 -1
- udata/tests/test_dcat_commands.py +2 -2
- udata/tests/test_discussions.py +5 -5
- udata/tests/test_migrations.py +181 -481
- udata/tests/test_notifications.py +15 -57
- udata/tests/test_notifications_task.py +43 -0
- udata/tests/test_owned.py +81 -1
- udata/tests/test_storages.py +25 -19
- udata/tests/test_topics.py +77 -61
- udata/tests/test_uris.py +33 -0
- udata/tests/workers/test_jobs_commands.py +23 -23
- udata/translations/ar/LC_MESSAGES/udata.mo +0 -0
- udata/translations/ar/LC_MESSAGES/udata.po +187 -108
- udata/translations/de/LC_MESSAGES/udata.mo +0 -0
- udata/translations/de/LC_MESSAGES/udata.po +187 -108
- udata/translations/es/LC_MESSAGES/udata.mo +0 -0
- udata/translations/es/LC_MESSAGES/udata.po +187 -108
- udata/translations/fr/LC_MESSAGES/udata.mo +0 -0
- udata/translations/fr/LC_MESSAGES/udata.po +188 -109
- udata/translations/it/LC_MESSAGES/udata.mo +0 -0
- udata/translations/it/LC_MESSAGES/udata.po +187 -108
- udata/translations/pt/LC_MESSAGES/udata.mo +0 -0
- udata/translations/pt/LC_MESSAGES/udata.po +187 -108
- udata/translations/sr/LC_MESSAGES/udata.mo +0 -0
- udata/translations/sr/LC_MESSAGES/udata.po +187 -108
- udata/translations/udata.pot +215 -106
- udata/uris.py +0 -2
- udata/utils.py +5 -0
- udata-14.4.1.dev7.dist-info/METADATA +109 -0
- {udata-13.0.1.dev12.dist-info → udata-14.4.1.dev7.dist-info}/RECORD +153 -166
- {udata-13.0.1.dev12.dist-info → udata-14.4.1.dev7.dist-info}/entry_points.txt +3 -5
- udata/core/followers/views.py +0 -15
- udata/core/post/forms.py +0 -30
- udata/entrypoints.py +0 -93
- udata/features/identicon/__init__.py +0 -0
- udata/features/identicon/api.py +0 -13
- udata/features/identicon/backends.py +0 -131
- udata/features/identicon/tests/__init__.py +0 -0
- udata/features/identicon/tests/test_backends.py +0 -18
- udata/features/territories/__init__.py +0 -49
- udata/features/territories/api.py +0 -25
- udata/features/territories/models.py +0 -51
- udata/flask_mongoengine/json.py +0 -38
- udata/migrations/__init__.py +0 -367
- udata/templates/mail/base.html +0 -105
- udata/templates/mail/base.txt +0 -6
- udata/templates/mail/button.html +0 -3
- udata/templates/mail/layouts/1-column.html +0 -19
- udata/templates/mail/layouts/2-columns.html +0 -20
- udata/templates/mail/layouts/center-panel.html +0 -16
- udata/tests/cli/test_db_cli.py +0 -68
- udata/tests/features/territories/__init__.py +0 -20
- udata/tests/features/territories/test_territories_api.py +0 -185
- udata/tests/frontend/test_hooks.py +0 -149
- udata-13.0.1.dev12.dist-info/METADATA +0 -133
- {udata-13.0.1.dev12.dist-info → udata-14.4.1.dev7.dist-info}/WHEEL +0 -0
- {udata-13.0.1.dev12.dist-info → udata-14.4.1.dev7.dist-info}/licenses/LICENSE +0 -0
- {udata-13.0.1.dev12.dist-info → udata-14.4.1.dev7.dist-info}/top_level.txt +0 -0
udata/frontend/__init__.py
CHANGED
|
@@ -1,132 +1,13 @@
|
|
|
1
|
-
import inspect
|
|
2
1
|
import logging
|
|
3
|
-
from importlib import import_module
|
|
4
2
|
|
|
5
|
-
import pkg_resources
|
|
6
|
-
from jinja2 import pass_context
|
|
7
|
-
from markupsafe import Markup
|
|
8
|
-
|
|
9
|
-
from udata import entrypoints
|
|
10
|
-
from udata.i18n import I18nBlueprint
|
|
11
|
-
|
|
12
|
-
from .markdown import UdataCleaner
|
|
13
3
|
from .markdown import init_app as init_markdown
|
|
14
4
|
|
|
15
5
|
log = logging.getLogger(__name__)
|
|
16
6
|
|
|
17
7
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
_template_hooks = {}
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
@hook.app_template_global()
|
|
24
|
-
def package_version(name: str) -> str:
|
|
25
|
-
return pkg_resources.get_distribution(name).version
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
@hook.app_template_filter()
|
|
29
|
-
def avatar_placeholder(url):
|
|
30
|
-
if url:
|
|
31
|
-
return url
|
|
32
|
-
|
|
33
|
-
return ""
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def _wrapper(func, name=None, when=None):
|
|
37
|
-
name = name or func.__name__
|
|
38
|
-
if name not in _template_hooks:
|
|
39
|
-
_template_hooks[name] = []
|
|
40
|
-
_template_hooks[name].append((func, when))
|
|
41
|
-
return func
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def template_hook(func_or_name, when=None):
|
|
45
|
-
if callable(func_or_name):
|
|
46
|
-
return _wrapper(func_or_name)
|
|
47
|
-
elif isinstance(func_or_name, str):
|
|
48
|
-
|
|
49
|
-
def wrapper(func):
|
|
50
|
-
return _wrapper(func, func_or_name, when=when)
|
|
51
|
-
|
|
52
|
-
return wrapper
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
def has_template_hook(name):
|
|
56
|
-
return name in _template_hooks
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
class HookRenderer:
|
|
60
|
-
def __init__(self, funcs, ctx, *args, **kwargs):
|
|
61
|
-
self.funcs = funcs
|
|
62
|
-
self.ctx = ctx
|
|
63
|
-
self.args = args
|
|
64
|
-
self.kwargs = kwargs
|
|
65
|
-
|
|
66
|
-
def __html__(self):
|
|
67
|
-
return Markup(
|
|
68
|
-
"".join(
|
|
69
|
-
f(self.ctx, *self.args, **self.kwargs)
|
|
70
|
-
for f, w in self.funcs
|
|
71
|
-
if w is None or w(self.ctx)
|
|
72
|
-
)
|
|
73
|
-
)
|
|
74
|
-
|
|
75
|
-
def __iter__(self):
|
|
76
|
-
for func, when in self.funcs:
|
|
77
|
-
if when is None or when(self.ctx):
|
|
78
|
-
yield Markup(func(self.ctx, *self.args, **self.kwargs))
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
@pass_context
|
|
82
|
-
def render_template_hook(ctx, name, *args, **kwargs):
|
|
83
|
-
if not has_template_hook(name):
|
|
84
|
-
return ""
|
|
85
|
-
return HookRenderer(_template_hooks[name], ctx, *args, **kwargs)
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
@hook.app_context_processor
|
|
89
|
-
def inject_hooks():
|
|
90
|
-
return {
|
|
91
|
-
"hook": render_template_hook,
|
|
92
|
-
"has_hook": has_template_hook,
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
class SafeMarkup(Markup):
|
|
97
|
-
"""Markup object bypasses Jinja's escaping. This override allows to sanitize the resulting html."""
|
|
98
|
-
|
|
99
|
-
def __new__(cls, base, *args, **kwargs):
|
|
100
|
-
cleaner = UdataCleaner()
|
|
101
|
-
return super().__new__(cls, cleaner.clean(base), *args, **kwargs)
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
def _load_views(app, module):
|
|
105
|
-
views = module if inspect.ismodule(module) else import_module(module)
|
|
106
|
-
blueprint = getattr(views, "blueprint", None)
|
|
107
|
-
if blueprint:
|
|
108
|
-
app.register_blueprint(blueprint)
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
VIEWS = ["core.storages"]
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
def init_app(app, views=None):
|
|
115
|
-
views = views or VIEWS
|
|
8
|
+
def init_app(app):
|
|
9
|
+
from udata.core.storages.views import blueprint as storage_blueprint
|
|
116
10
|
|
|
117
11
|
init_markdown(app)
|
|
118
12
|
|
|
119
|
-
|
|
120
|
-
_load_views(app, "udata.{}.views".format(view))
|
|
121
|
-
|
|
122
|
-
# Load hook blueprint
|
|
123
|
-
app.register_blueprint(hook)
|
|
124
|
-
|
|
125
|
-
# Load all plugins views and blueprints
|
|
126
|
-
for module in entrypoints.get_enabled("udata.views", app).values():
|
|
127
|
-
_load_views(app, module)
|
|
128
|
-
|
|
129
|
-
# Load all plugins views and blueprints
|
|
130
|
-
for module in entrypoints.get_enabled("udata.front", app).values():
|
|
131
|
-
front_module = module if inspect.ismodule(module) else import_module(module)
|
|
132
|
-
front_module.init_app(app)
|
|
13
|
+
app.register_blueprint(storage_blueprint)
|
udata/frontend/markdown.py
CHANGED
|
@@ -7,8 +7,9 @@ import html2text
|
|
|
7
7
|
import mistune
|
|
8
8
|
from bleach.css_sanitizer import CSSSanitizer
|
|
9
9
|
from bleach.linkifier import LinkifyFilter
|
|
10
|
-
from flask import
|
|
10
|
+
from flask import current_app, request
|
|
11
11
|
from jinja2.filters import do_striptags, do_truncate
|
|
12
|
+
from markupsafe import Markup
|
|
12
13
|
from werkzeug.local import LocalProxy
|
|
13
14
|
|
|
14
15
|
from udata.i18n import _
|
udata/harvest/actions.py
CHANGED
|
@@ -34,11 +34,6 @@ def get_source(ident):
|
|
|
34
34
|
return HarvestSource.get(ident)
|
|
35
35
|
|
|
36
36
|
|
|
37
|
-
def list_backends():
|
|
38
|
-
"""List all available backends"""
|
|
39
|
-
return backends.get_all(current_app).values()
|
|
40
|
-
|
|
41
|
-
|
|
42
37
|
def list_sources(owner=None, deleted=False):
|
|
43
38
|
"""List all harvest sources"""
|
|
44
39
|
sources = HarvestSource.objects
|
|
@@ -177,7 +172,7 @@ def purge_jobs():
|
|
|
177
172
|
|
|
178
173
|
def run(source: HarvestSource):
|
|
179
174
|
"""Launch or resume an harvesting for a given source if none is running"""
|
|
180
|
-
cls = backends.
|
|
175
|
+
cls = backends.get_backend(source.backend)
|
|
181
176
|
backend = cls(source)
|
|
182
177
|
backend.harvest()
|
|
183
178
|
|
|
@@ -189,7 +184,7 @@ def launch(source: HarvestSource):
|
|
|
189
184
|
|
|
190
185
|
def preview(source: HarvestSource):
|
|
191
186
|
"""Preview an harvesting for a given source"""
|
|
192
|
-
cls = backends.
|
|
187
|
+
cls = backends.get_backend(source.backend)
|
|
193
188
|
max_items = current_app.config["HARVEST_PREVIEW_MAX_ITEMS"]
|
|
194
189
|
backend = cls(source, dryrun=True, max_items=max_items)
|
|
195
190
|
return backend.harvest()
|
|
@@ -226,7 +221,7 @@ def preview_from_config(
|
|
|
226
221
|
active=active,
|
|
227
222
|
autoarchive=autoarchive,
|
|
228
223
|
)
|
|
229
|
-
cls = backends.
|
|
224
|
+
cls = backends.get_backend(source.backend)
|
|
230
225
|
max_items = current_app.config["HARVEST_PREVIEW_MAX_ITEMS"]
|
|
231
226
|
backend = cls(source, dryrun=True, max_items=max_items)
|
|
232
227
|
return backend.harvest()
|
|
@@ -259,7 +254,7 @@ def schedule(
|
|
|
259
254
|
source.modify(
|
|
260
255
|
periodic_task=PeriodicTask.objects.create(
|
|
261
256
|
task="harvest",
|
|
262
|
-
name="Harvest {
|
|
257
|
+
name=f"Harvest {source.name} ({source.id})",
|
|
263
258
|
description="Periodic Harvesting",
|
|
264
259
|
enabled=True,
|
|
265
260
|
args=[str(source.id)],
|
|
@@ -322,3 +317,23 @@ def attach(domain, filename):
|
|
|
322
317
|
count += 1
|
|
323
318
|
|
|
324
319
|
return AttachResult(count, errors)
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def detach(dataset: Dataset):
|
|
323
|
+
"""Detach a dataset from its harvest source
|
|
324
|
+
|
|
325
|
+
The dataset will be cleaned from harvested information
|
|
326
|
+
and will no longer be updated or archived by harvesting.
|
|
327
|
+
"""
|
|
328
|
+
dataset.harvest = None
|
|
329
|
+
for resource in dataset.resources:
|
|
330
|
+
resource.harvest = None
|
|
331
|
+
dataset.save()
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def detach_all_from_source(source: HarvestSource):
|
|
335
|
+
"""Detach all datasets linked to a harvest source"""
|
|
336
|
+
datasets = Dataset.objects.filter(harvest__source_id=str(source.id))
|
|
337
|
+
for dataset in datasets:
|
|
338
|
+
detach(dataset)
|
|
339
|
+
return len(datasets)
|
udata/harvest/api.py
CHANGED
|
@@ -6,10 +6,10 @@ from udata.api import API, api, fields
|
|
|
6
6
|
from udata.auth import admin_permission
|
|
7
7
|
from udata.core.dataservices.models import Dataservice
|
|
8
8
|
from udata.core.dataset.api_fields import dataset_fields, dataset_ref_fields
|
|
9
|
-
from udata.core.dataset.permissions import OwnablePermission
|
|
10
9
|
from udata.core.organization.api_fields import org_ref_fields
|
|
11
10
|
from udata.core.organization.permissions import EditOrganizationPermission
|
|
12
11
|
from udata.core.user.api_fields import user_ref_fields
|
|
12
|
+
from udata.harvest.backends import get_enabled_backends
|
|
13
13
|
|
|
14
14
|
from . import actions
|
|
15
15
|
from .forms import HarvestSourceForm, HarvestSourceValidationForm
|
|
@@ -25,10 +25,6 @@ from .models import (
|
|
|
25
25
|
ns = api.namespace("harvest", "Harvest related operations")
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
def backends_ids():
|
|
29
|
-
return [b.name for b in actions.list_backends()]
|
|
30
|
-
|
|
31
|
-
|
|
32
28
|
error_fields = api.model(
|
|
33
29
|
"HarvestError",
|
|
34
30
|
{
|
|
@@ -58,6 +54,7 @@ item_fields = api.model(
|
|
|
58
54
|
"HarvestItem",
|
|
59
55
|
{
|
|
60
56
|
"remote_id": fields.String(description="The item remote ID to process", required=True),
|
|
57
|
+
"remote_url": fields.String(description="The item remote url (if available)"),
|
|
61
58
|
"dataset": fields.Nested(
|
|
62
59
|
dataset_ref_fields, description="The processed dataset", allow_null=True
|
|
63
60
|
),
|
|
@@ -118,6 +115,18 @@ validation_fields = api.model(
|
|
|
118
115
|
},
|
|
119
116
|
)
|
|
120
117
|
|
|
118
|
+
source_permissions_fields = api.model(
|
|
119
|
+
"HarvestSourcePermissions",
|
|
120
|
+
{
|
|
121
|
+
"edit": fields.Permission(),
|
|
122
|
+
"delete": fields.Permission(),
|
|
123
|
+
"run": fields.Permission(),
|
|
124
|
+
"preview": fields.Permission(),
|
|
125
|
+
"validate": fields.Permission(),
|
|
126
|
+
"schedule": fields.Permission(),
|
|
127
|
+
},
|
|
128
|
+
)
|
|
129
|
+
|
|
121
130
|
source_fields = api.model(
|
|
122
131
|
"HarvestSource",
|
|
123
132
|
{
|
|
@@ -126,7 +135,9 @@ source_fields = api.model(
|
|
|
126
135
|
"description": fields.Markdown(description="The source description"),
|
|
127
136
|
"url": fields.String(description="The source base URL", required=True),
|
|
128
137
|
"backend": fields.String(
|
|
129
|
-
description="The source backend",
|
|
138
|
+
description="The source backend",
|
|
139
|
+
enum=lambda: list(get_enabled_backends().keys()),
|
|
140
|
+
required=True,
|
|
130
141
|
),
|
|
131
142
|
"config": fields.Raw(description="The configuration as key-value pairs"),
|
|
132
143
|
"created_at": fields.ISODateTime(
|
|
@@ -154,6 +165,7 @@ source_fields = api.model(
|
|
|
154
165
|
"schedule": fields.String(
|
|
155
166
|
description="The source schedule (interval or cron expression)", readonly=True
|
|
156
167
|
),
|
|
168
|
+
"permissions": fields.Nested(source_permissions_fields, readonly=True),
|
|
157
169
|
},
|
|
158
170
|
)
|
|
159
171
|
|
|
@@ -314,7 +326,7 @@ class SourceAPI(API):
|
|
|
314
326
|
@api.marshal_with(source_fields)
|
|
315
327
|
def put(self, source: HarvestSource):
|
|
316
328
|
"""Update a harvest source"""
|
|
317
|
-
|
|
329
|
+
source.permissions["edit"].test()
|
|
318
330
|
form = api.validate(HarvestSourceForm, source)
|
|
319
331
|
source = actions.update_source(source, form.data)
|
|
320
332
|
return source
|
|
@@ -323,18 +335,19 @@ class SourceAPI(API):
|
|
|
323
335
|
@api.doc("delete_harvest_source")
|
|
324
336
|
@api.marshal_with(source_fields)
|
|
325
337
|
def delete(self, source: HarvestSource):
|
|
326
|
-
|
|
338
|
+
source.permissions["delete"].test()
|
|
327
339
|
return actions.delete_source(source), 204
|
|
328
340
|
|
|
329
341
|
|
|
330
342
|
@ns.route("/source/<harvest_source:source>/validate/", endpoint="validate_harvest_source")
|
|
331
343
|
class ValidateSourceAPI(API):
|
|
332
344
|
@api.doc("validate_harvest_source")
|
|
333
|
-
@api.secure
|
|
345
|
+
@api.secure
|
|
334
346
|
@api.expect(validation_fields)
|
|
335
347
|
@api.marshal_with(source_fields)
|
|
336
348
|
def post(self, source: HarvestSource):
|
|
337
349
|
"""Validate or reject an harvest source"""
|
|
350
|
+
source.permissions["validate"].test()
|
|
338
351
|
form = api.validate(HarvestSourceValidationForm)
|
|
339
352
|
if form.state.data == VALIDATION_ACCEPTED:
|
|
340
353
|
return actions.validate_source(source, form.comment.data)
|
|
@@ -355,7 +368,7 @@ class RunSourceAPI(API):
|
|
|
355
368
|
"Cannot run source manually. Please contact the platform if you need to reschedule the harvester.",
|
|
356
369
|
)
|
|
357
370
|
|
|
358
|
-
|
|
371
|
+
source.permissions["run"].test()
|
|
359
372
|
|
|
360
373
|
if source.validation.state != VALIDATION_ACCEPTED:
|
|
361
374
|
api.abort(400, "Source is not validated. Please validate the source before running.")
|
|
@@ -368,11 +381,12 @@ class RunSourceAPI(API):
|
|
|
368
381
|
@ns.route("/source/<harvest_source:source>/schedule/", endpoint="schedule_harvest_source")
|
|
369
382
|
class ScheduleSourceAPI(API):
|
|
370
383
|
@api.doc("schedule_harvest_source")
|
|
371
|
-
@api.secure
|
|
384
|
+
@api.secure
|
|
372
385
|
@api.expect((str, "A cron expression"))
|
|
373
386
|
@api.marshal_with(source_fields)
|
|
374
387
|
def post(self, source: HarvestSource):
|
|
375
388
|
"""Schedule an harvest source"""
|
|
389
|
+
source.permissions["schedule"].test()
|
|
376
390
|
# Handle both syntax: quoted and unquoted
|
|
377
391
|
try:
|
|
378
392
|
data = request.json
|
|
@@ -381,10 +395,11 @@ class ScheduleSourceAPI(API):
|
|
|
381
395
|
return actions.schedule(source, data)
|
|
382
396
|
|
|
383
397
|
@api.doc("unschedule_harvest_source")
|
|
384
|
-
@api.secure
|
|
398
|
+
@api.secure
|
|
385
399
|
@api.marshal_with(source_fields)
|
|
386
400
|
def delete(self, source: HarvestSource):
|
|
387
401
|
"""Unschedule an harvest source"""
|
|
402
|
+
source.permissions["schedule"].test()
|
|
388
403
|
return actions.unschedule(source), 204
|
|
389
404
|
|
|
390
405
|
|
|
@@ -409,6 +424,7 @@ class PreviewSourceAPI(API):
|
|
|
409
424
|
@api.marshal_with(preview_job_fields)
|
|
410
425
|
def get(self, source: HarvestSource):
|
|
411
426
|
"""Preview a single harvest source given an ID or a slug"""
|
|
427
|
+
source.permissions["preview"].test()
|
|
412
428
|
return actions.preview(source)
|
|
413
429
|
|
|
414
430
|
|
|
@@ -438,7 +454,7 @@ class JobAPI(API):
|
|
|
438
454
|
@api.expect(parser)
|
|
439
455
|
@api.marshal_with(job_fields)
|
|
440
456
|
def get(self, ident):
|
|
441
|
-
"""
|
|
457
|
+
"""Get a single job given an ID"""
|
|
442
458
|
return actions.get_job(ident)
|
|
443
459
|
|
|
444
460
|
|
|
@@ -457,15 +473,7 @@ class ListBackendsAPI(API):
|
|
|
457
473
|
"features": [f.as_dict() for f in b.features],
|
|
458
474
|
"extra_configs": [f.as_dict() for f in b.extra_configs],
|
|
459
475
|
}
|
|
460
|
-
for b in
|
|
476
|
+
for b in get_enabled_backends().values()
|
|
461
477
|
],
|
|
462
478
|
key=lambda b: b["label"],
|
|
463
479
|
)
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
@ns.route("/job_status/", endpoint="havest_job_status")
|
|
467
|
-
class ListHarvesterAPI(API):
|
|
468
|
-
@api.doc(model=[str])
|
|
469
|
-
def get(self):
|
|
470
|
-
"""List all available harvesters"""
|
|
471
|
-
return actions.list_backends()
|
|
@@ -1,17 +1,29 @@
|
|
|
1
|
-
from
|
|
1
|
+
from fnmatch import fnmatch
|
|
2
|
+
from importlib.metadata import entry_points
|
|
2
3
|
|
|
4
|
+
from flask import current_app
|
|
3
5
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
6
|
+
from .base import BaseBackend, HarvestExtraConfig, HarvestFeature, HarvestFilter # noqa
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_backend(name: str) -> type[BaseBackend] | None:
|
|
10
|
+
backend = get_enabled_backends().get(name)
|
|
7
11
|
if not backend:
|
|
8
|
-
|
|
9
|
-
raise EntrypointError(msg)
|
|
12
|
+
raise ValueError(f"Backend {name} unknown. Make sure it is declared in HARVESTER_BACKENDS.")
|
|
10
13
|
return backend
|
|
11
14
|
|
|
12
15
|
|
|
13
|
-
def
|
|
14
|
-
|
|
16
|
+
def get_all_backends() -> dict[str, type[BaseBackend]]:
|
|
17
|
+
# Note that we use the `BaseBackend.name` and not `ep.name`. The entrypoint name
|
|
18
|
+
# is not used anymore.
|
|
19
|
+
return {ep.load().name: ep.load() for ep in entry_points(group="udata.harvesters")}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def is_backend_enabled(backend: type[BaseBackend]) -> bool:
|
|
23
|
+
return any(fnmatch(backend.name, g) for g in current_app.config["HARVESTER_BACKENDS"])
|
|
15
24
|
|
|
16
25
|
|
|
17
|
-
|
|
26
|
+
def get_enabled_backends() -> dict[str, type[BaseBackend]]:
|
|
27
|
+
return {
|
|
28
|
+
name: backend for name, backend in get_all_backends().items() if is_backend_enabled(backend)
|
|
29
|
+
}
|
udata/harvest/backends/base.py
CHANGED
|
@@ -85,8 +85,8 @@ class BaseBackend(object):
|
|
|
85
85
|
Also provides a few helpers needed on all or some backends.
|
|
86
86
|
"""
|
|
87
87
|
|
|
88
|
-
name
|
|
89
|
-
display_name = None
|
|
88
|
+
name: str
|
|
89
|
+
display_name: str | None = None
|
|
90
90
|
verify_ssl = True
|
|
91
91
|
|
|
92
92
|
# Define some allowed filters on the backend
|
|
@@ -166,6 +166,7 @@ class BaseBackend(object):
|
|
|
166
166
|
log.debug(f"Starting harvesting {self.source.name} ({self.source.url})…")
|
|
167
167
|
factory = HarvestJob if self.dryrun else HarvestJob.objects.create
|
|
168
168
|
self.job = factory(status="initialized", started=datetime.utcnow(), source=self.source)
|
|
169
|
+
self.remote_ids = set()
|
|
169
170
|
|
|
170
171
|
before_harvest_job.send(self)
|
|
171
172
|
# Set harvest_activity_user on global context during the run
|
|
@@ -190,6 +191,7 @@ class BaseBackend(object):
|
|
|
190
191
|
|
|
191
192
|
if any(i.status == "failed" for i in self.job.items):
|
|
192
193
|
self.job.status += "-errors"
|
|
194
|
+
|
|
193
195
|
except HarvestValidationError as e:
|
|
194
196
|
log.exception(
|
|
195
197
|
f'Harvesting validation failed for "{safe_unicode(self.source.name)}" ({self.source.backend})'
|
|
@@ -199,6 +201,15 @@ class BaseBackend(object):
|
|
|
199
201
|
|
|
200
202
|
error = HarvestError(message=safe_unicode(e))
|
|
201
203
|
self.job.errors.append(error)
|
|
204
|
+
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e:
|
|
205
|
+
log.warning(
|
|
206
|
+
f'Harvesting connection error for "{safe_unicode(self.source.name)}" ({self.source.backend}): {e}'
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
self.job.status = "failed"
|
|
210
|
+
|
|
211
|
+
error = HarvestError(message=safe_unicode(e), details=traceback.format_exc())
|
|
212
|
+
self.job.errors.append(error)
|
|
202
213
|
except Exception as e:
|
|
203
214
|
log.exception(
|
|
204
215
|
f'Harvesting failed for "{safe_unicode(self.source.name)}" ({self.source.backend})'
|
|
@@ -232,8 +243,13 @@ class BaseBackend(object):
|
|
|
232
243
|
|
|
233
244
|
current_app.logger.addHandler(log_catcher)
|
|
234
245
|
dataset = self.inner_process_dataset(item, **kwargs)
|
|
246
|
+
if dataset.harvest:
|
|
247
|
+
item.remote_url = dataset.harvest.remote_url
|
|
248
|
+
|
|
249
|
+
# Use `item.remote_id` from this point, because `inner_process_dataset` could have modified it.
|
|
250
|
+
|
|
251
|
+
self.ensure_unique_remote_id(item)
|
|
235
252
|
|
|
236
|
-
# Use `item.remote_id` because `inner_process_dataset` could have modified it.
|
|
237
253
|
dataset.harvest = self.update_dataset_harvest_info(dataset.harvest, item.remote_id)
|
|
238
254
|
dataset.archived = None
|
|
239
255
|
|
|
@@ -291,6 +307,10 @@ class BaseBackend(object):
|
|
|
291
307
|
raise HarvestSkipException("missing identifier")
|
|
292
308
|
|
|
293
309
|
dataservice = self.inner_process_dataservice(item, **kwargs)
|
|
310
|
+
if dataservice.harvest:
|
|
311
|
+
item.remote_url = dataservice.harvest.remote_url
|
|
312
|
+
|
|
313
|
+
self.ensure_unique_remote_id(item)
|
|
294
314
|
|
|
295
315
|
dataservice.harvest = self.update_dataservice_harvest_info(
|
|
296
316
|
dataservice.harvest, remote_id
|
|
@@ -325,6 +345,12 @@ class BaseBackend(object):
|
|
|
325
345
|
item.ended = datetime.utcnow()
|
|
326
346
|
self.save_job()
|
|
327
347
|
|
|
348
|
+
def ensure_unique_remote_id(self, item):
|
|
349
|
+
if item.remote_id in self.remote_ids:
|
|
350
|
+
raise HarvestValidationError(f"Identifier '{item.remote_id}' already exists")
|
|
351
|
+
|
|
352
|
+
self.remote_ids.add(item.remote_id)
|
|
353
|
+
|
|
328
354
|
def update_dataset_harvest_info(self, harvest: HarvestDatasetMetadata | None, remote_id: int):
|
|
329
355
|
if not harvest:
|
|
330
356
|
harvest = HarvestDatasetMetadata()
|
|
@@ -25,6 +25,7 @@ ALLOWED_RESOURCE_TYPES = ("dkan", "file", "file.upload", "api", "metadata")
|
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
class CkanBackend(BaseBackend):
|
|
28
|
+
name = "ckan"
|
|
28
29
|
display_name = "CKAN"
|
|
29
30
|
filters = (
|
|
30
31
|
HarvestFilter(_("Organization"), "organization", str, _("A CKAN Organization name")),
|
|
@@ -172,7 +173,10 @@ class CkanBackend(BaseBackend):
|
|
|
172
173
|
continue
|
|
173
174
|
elif key == "spatial":
|
|
174
175
|
# GeoJSON representation (Polygon or Point)
|
|
175
|
-
|
|
176
|
+
if isinstance(value, dict):
|
|
177
|
+
spatial_geom = value
|
|
178
|
+
else:
|
|
179
|
+
spatial_geom = json.loads(value)
|
|
176
180
|
elif key == "spatial-text":
|
|
177
181
|
# Textual representation of the extent / location
|
|
178
182
|
qs = GeoZone.objects(db.Q(name=value) | db.Q(slug=value))
|
|
@@ -212,12 +216,17 @@ class CkanBackend(BaseBackend):
|
|
|
212
216
|
dataset.spatial.zones = [spatial_zone]
|
|
213
217
|
|
|
214
218
|
if spatial_geom:
|
|
219
|
+
if "type" not in spatial_geom:
|
|
220
|
+
raise HarvestException(f"Spatial geometry {spatial_geom} without `type`")
|
|
221
|
+
|
|
215
222
|
if spatial_geom["type"] == "Polygon":
|
|
216
223
|
coordinates = [spatial_geom["coordinates"]]
|
|
217
224
|
elif spatial_geom["type"] == "MultiPolygon":
|
|
218
225
|
coordinates = spatial_geom["coordinates"]
|
|
219
226
|
else:
|
|
220
|
-
raise HarvestException(
|
|
227
|
+
raise HarvestException(
|
|
228
|
+
f"Unsupported spatial geometry {spatial_geom['type']} in {spatial_geom}. (Supported types are `Polygon` and `MultiPolygon`)"
|
|
229
|
+
)
|
|
221
230
|
dataset.spatial.geom = {"type": "MultiPolygon", "coordinates": coordinates}
|
|
222
231
|
|
|
223
232
|
if temporal_start and temporal_end:
|
|
@@ -265,5 +274,7 @@ class CkanBackend(BaseBackend):
|
|
|
265
274
|
|
|
266
275
|
|
|
267
276
|
class DkanBackend(CkanBackend):
|
|
277
|
+
name = "dkan"
|
|
278
|
+
display_name = "DKAN"
|
|
268
279
|
schema = dkan_schema
|
|
269
280
|
filters = []
|
udata/harvest/backends/dcat.py
CHANGED
|
@@ -63,6 +63,7 @@ def extract_graph(source, target, node, specs):
|
|
|
63
63
|
|
|
64
64
|
|
|
65
65
|
class DcatBackend(BaseBackend):
|
|
66
|
+
name = "dcat"
|
|
66
67
|
display_name = "DCAT"
|
|
67
68
|
|
|
68
69
|
def __init__(self, *args, **kwargs):
|
|
@@ -256,6 +257,7 @@ class CswDcatBackend(DcatBackend):
|
|
|
256
257
|
The parsing of items is then the same as for the DcatBackend.
|
|
257
258
|
"""
|
|
258
259
|
|
|
260
|
+
name = "csw-dcat"
|
|
259
261
|
display_name = "CSW-DCAT"
|
|
260
262
|
|
|
261
263
|
# CSW_REQUEST is based on:
|
|
@@ -424,6 +426,7 @@ class CswIso19139DcatBackend(CswDcatBackend):
|
|
|
424
426
|
The parsing of items is then the same as for the DcatBackend.
|
|
425
427
|
"""
|
|
426
428
|
|
|
429
|
+
name = "csw-iso-19139"
|
|
427
430
|
display_name = "CSW-ISO-19139"
|
|
428
431
|
|
|
429
432
|
extra_configs = (
|
udata/harvest/backends/maaf.py
CHANGED
udata/harvest/commands.py
CHANGED
|
@@ -2,7 +2,9 @@ import logging
|
|
|
2
2
|
|
|
3
3
|
import click
|
|
4
4
|
|
|
5
|
-
from udata.commands import cli
|
|
5
|
+
from udata.commands import KO, OK, cli, green, red
|
|
6
|
+
from udata.harvest.backends import get_all_backends, is_backend_enabled
|
|
7
|
+
from udata.models import Dataset
|
|
6
8
|
|
|
7
9
|
from . import actions
|
|
8
10
|
|
|
@@ -89,9 +91,10 @@ def sources(scheduled=False):
|
|
|
89
91
|
@grp.command()
|
|
90
92
|
def backends():
|
|
91
93
|
"""List available backends"""
|
|
92
|
-
|
|
93
|
-
for backend in
|
|
94
|
-
|
|
94
|
+
print("Available backends:")
|
|
95
|
+
for backend in get_all_backends().values():
|
|
96
|
+
status = green(OK) if is_backend_enabled(backend) else red(KO)
|
|
97
|
+
click.echo("{0} {1} ({2})".format(status, backend.display_name, backend.name))
|
|
95
98
|
|
|
96
99
|
|
|
97
100
|
@grp.command()
|
|
@@ -154,3 +157,35 @@ def attach(domain, filename):
|
|
|
154
157
|
log.info("Attaching datasets for domain %s", domain)
|
|
155
158
|
result = actions.attach(domain, filename)
|
|
156
159
|
log.info("Attached %s datasets to %s", result.success, domain)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
@grp.command()
|
|
163
|
+
@click.argument("dataset_id")
|
|
164
|
+
def detach(dataset_id):
|
|
165
|
+
"""
|
|
166
|
+
Detach a dataset_id from its harvest source
|
|
167
|
+
|
|
168
|
+
The dataset will be cleaned from harvested information
|
|
169
|
+
"""
|
|
170
|
+
log.info(f"Detaching dataset {dataset_id}")
|
|
171
|
+
dataset = Dataset.get(dataset_id)
|
|
172
|
+
actions.detach(dataset)
|
|
173
|
+
log.info("Done")
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
@grp.command()
|
|
177
|
+
@click.argument("identifier")
|
|
178
|
+
def detach_all_from_source(identifier):
|
|
179
|
+
"""
|
|
180
|
+
Detach all datasets from a harvest source
|
|
181
|
+
|
|
182
|
+
All the datasets will be cleaned from harvested information.
|
|
183
|
+
Make sure the harvest source won't create new duplicate datasets,
|
|
184
|
+
either by deactivating it or filtering its scope, etc.
|
|
185
|
+
"""
|
|
186
|
+
log.info(f"Detaching datasets from harvest source {identifier}")
|
|
187
|
+
count = actions.detach_all_from_source(actions.get_source(identifier))
|
|
188
|
+
log.info(f"Detached {count} datasets")
|
|
189
|
+
log.warning(
|
|
190
|
+
"Make sure the harvest source won't create new duplicate datasets, either by deactivating it or filtering its scope, etc."
|
|
191
|
+
)
|