udata 13.0.1.dev12__py3-none-any.whl → 14.4.1.dev7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of udata might be problematic. Click here for more details.

Files changed (177) hide show
  1. udata/api/__init__.py +2 -8
  2. udata/api_fields.py +35 -4
  3. udata/app.py +30 -50
  4. udata/auth/__init__.py +29 -6
  5. udata/auth/forms.py +8 -6
  6. udata/auth/views.py +6 -3
  7. udata/commands/__init__.py +2 -14
  8. udata/commands/db.py +13 -25
  9. udata/commands/info.py +0 -16
  10. udata/commands/serve.py +3 -11
  11. udata/commands/tests/test_fixtures.py +9 -9
  12. udata/core/access_type/api.py +1 -1
  13. udata/core/access_type/constants.py +12 -8
  14. udata/core/activity/api.py +5 -6
  15. udata/core/avatars/api.py +43 -0
  16. udata/core/avatars/test_avatar_api.py +30 -0
  17. udata/core/badges/tests/test_commands.py +6 -6
  18. udata/core/csv.py +5 -0
  19. udata/core/dataservices/models.py +15 -3
  20. udata/core/dataservices/tasks.py +7 -0
  21. udata/core/dataset/api.py +2 -0
  22. udata/core/dataset/models.py +2 -2
  23. udata/core/dataset/permissions.py +31 -0
  24. udata/core/dataset/tasks.py +50 -10
  25. udata/core/discussions/models.py +1 -0
  26. udata/core/metrics/__init__.py +0 -6
  27. udata/core/organization/api.py +8 -5
  28. udata/core/organization/mails.py +1 -1
  29. udata/core/organization/models.py +9 -1
  30. udata/core/organization/notifications.py +84 -0
  31. udata/core/organization/permissions.py +1 -1
  32. udata/core/organization/tasks.py +3 -0
  33. udata/core/pages/tests/test_api.py +32 -0
  34. udata/core/post/api.py +24 -69
  35. udata/core/post/models.py +84 -16
  36. udata/core/post/tests/test_api.py +24 -1
  37. udata/core/reports/api.py +18 -0
  38. udata/core/reports/models.py +42 -2
  39. udata/core/reuse/models.py +1 -1
  40. udata/core/reuse/tasks.py +7 -0
  41. udata/core/site/models.py +2 -6
  42. udata/core/spatial/commands.py +2 -4
  43. udata/core/spatial/forms.py +2 -2
  44. udata/core/spatial/models.py +0 -10
  45. udata/core/spatial/tests/test_api.py +1 -36
  46. udata/core/user/models.py +15 -2
  47. udata/cors.py +2 -5
  48. udata/db/migrations.py +279 -0
  49. udata/features/notifications/api.py +7 -18
  50. udata/features/notifications/models.py +56 -0
  51. udata/features/notifications/tasks.py +25 -0
  52. udata/flask_mongoengine/engine.py +0 -4
  53. udata/frontend/__init__.py +3 -122
  54. udata/frontend/markdown.py +2 -1
  55. udata/harvest/actions.py +24 -9
  56. udata/harvest/api.py +30 -22
  57. udata/harvest/backends/__init__.py +21 -9
  58. udata/harvest/backends/base.py +29 -3
  59. udata/harvest/backends/ckan/harvesters.py +13 -2
  60. udata/harvest/backends/dcat.py +3 -0
  61. udata/harvest/backends/maaf.py +1 -0
  62. udata/harvest/commands.py +39 -4
  63. udata/harvest/filters.py +17 -6
  64. udata/harvest/forms.py +9 -6
  65. udata/harvest/models.py +16 -0
  66. udata/harvest/permissions.py +27 -0
  67. udata/harvest/tasks.py +3 -5
  68. udata/harvest/tests/ckan/test_ckan_backend.py +35 -2
  69. udata/harvest/tests/ckan/test_ckan_backend_errors.py +1 -1
  70. udata/harvest/tests/ckan/test_ckan_backend_filters.py +1 -1
  71. udata/harvest/tests/ckan/test_dkan_backend.py +1 -1
  72. udata/harvest/tests/dcat/udata.xml +6 -6
  73. udata/harvest/tests/factories.py +1 -1
  74. udata/harvest/tests/test_actions.py +63 -8
  75. udata/harvest/tests/test_api.py +278 -123
  76. udata/harvest/tests/test_base_backend.py +88 -1
  77. udata/harvest/tests/test_dcat_backend.py +60 -13
  78. udata/harvest/tests/test_filters.py +6 -0
  79. udata/i18n.py +11 -273
  80. udata/mail.py +5 -1
  81. udata/migrations/2025-10-31-create-membership-request-notifications.py +55 -0
  82. udata/migrations/2025-11-13-delete-user-email-index.py +25 -0
  83. udata/migrations/2025-12-04-add-uuid-to-discussion-messages.py +28 -0
  84. udata/models/__init__.py +0 -8
  85. udata/mongo/slug_fields.py +1 -1
  86. udata/rdf.py +45 -6
  87. udata/routing.py +2 -10
  88. udata/sentry.py +4 -10
  89. udata/settings.py +23 -17
  90. udata/tasks.py +4 -3
  91. udata/templates/mail/message.html +5 -31
  92. udata/tests/__init__.py +28 -12
  93. udata/tests/api/__init__.py +108 -21
  94. udata/tests/api/test_activities_api.py +36 -0
  95. udata/tests/api/test_auth_api.py +121 -95
  96. udata/tests/api/test_base_api.py +7 -4
  97. udata/tests/api/test_dataservices_api.py +29 -1
  98. udata/tests/api/test_datasets_api.py +45 -21
  99. udata/tests/api/test_organizations_api.py +192 -197
  100. udata/tests/api/test_reports_api.py +157 -0
  101. udata/tests/api/test_reuses_api.py +147 -147
  102. udata/tests/api/test_security_api.py +12 -12
  103. udata/tests/api/test_swagger.py +4 -4
  104. udata/tests/api/test_tags_api.py +8 -8
  105. udata/tests/api/test_user_api.py +13 -1
  106. udata/tests/apiv2/test_swagger.py +4 -4
  107. udata/tests/apiv2/test_topics.py +1 -1
  108. udata/tests/cli/test_cli_base.py +8 -9
  109. udata/tests/dataset/test_dataset_commands.py +4 -4
  110. udata/tests/dataset/test_dataset_model.py +66 -26
  111. udata/tests/dataset/test_dataset_rdf.py +99 -5
  112. udata/tests/dataset/test_resource_preview.py +0 -1
  113. udata/tests/frontend/test_auth.py +24 -1
  114. udata/tests/frontend/test_csv.py +0 -3
  115. udata/tests/helpers.py +37 -27
  116. udata/tests/organization/test_notifications.py +67 -2
  117. udata/tests/plugin.py +6 -261
  118. udata/tests/site/test_site_csv_exports.py +22 -10
  119. udata/tests/test_activity.py +9 -9
  120. udata/tests/test_cors.py +1 -1
  121. udata/tests/test_dcat_commands.py +2 -2
  122. udata/tests/test_discussions.py +5 -5
  123. udata/tests/test_migrations.py +181 -481
  124. udata/tests/test_notifications.py +15 -57
  125. udata/tests/test_notifications_task.py +43 -0
  126. udata/tests/test_owned.py +81 -1
  127. udata/tests/test_storages.py +25 -19
  128. udata/tests/test_topics.py +77 -61
  129. udata/tests/test_uris.py +33 -0
  130. udata/tests/workers/test_jobs_commands.py +23 -23
  131. udata/translations/ar/LC_MESSAGES/udata.mo +0 -0
  132. udata/translations/ar/LC_MESSAGES/udata.po +187 -108
  133. udata/translations/de/LC_MESSAGES/udata.mo +0 -0
  134. udata/translations/de/LC_MESSAGES/udata.po +187 -108
  135. udata/translations/es/LC_MESSAGES/udata.mo +0 -0
  136. udata/translations/es/LC_MESSAGES/udata.po +187 -108
  137. udata/translations/fr/LC_MESSAGES/udata.mo +0 -0
  138. udata/translations/fr/LC_MESSAGES/udata.po +188 -109
  139. udata/translations/it/LC_MESSAGES/udata.mo +0 -0
  140. udata/translations/it/LC_MESSAGES/udata.po +187 -108
  141. udata/translations/pt/LC_MESSAGES/udata.mo +0 -0
  142. udata/translations/pt/LC_MESSAGES/udata.po +187 -108
  143. udata/translations/sr/LC_MESSAGES/udata.mo +0 -0
  144. udata/translations/sr/LC_MESSAGES/udata.po +187 -108
  145. udata/translations/udata.pot +215 -106
  146. udata/uris.py +0 -2
  147. udata/utils.py +5 -0
  148. udata-14.4.1.dev7.dist-info/METADATA +109 -0
  149. {udata-13.0.1.dev12.dist-info → udata-14.4.1.dev7.dist-info}/RECORD +153 -166
  150. {udata-13.0.1.dev12.dist-info → udata-14.4.1.dev7.dist-info}/entry_points.txt +3 -5
  151. udata/core/followers/views.py +0 -15
  152. udata/core/post/forms.py +0 -30
  153. udata/entrypoints.py +0 -93
  154. udata/features/identicon/__init__.py +0 -0
  155. udata/features/identicon/api.py +0 -13
  156. udata/features/identicon/backends.py +0 -131
  157. udata/features/identicon/tests/__init__.py +0 -0
  158. udata/features/identicon/tests/test_backends.py +0 -18
  159. udata/features/territories/__init__.py +0 -49
  160. udata/features/territories/api.py +0 -25
  161. udata/features/territories/models.py +0 -51
  162. udata/flask_mongoengine/json.py +0 -38
  163. udata/migrations/__init__.py +0 -367
  164. udata/templates/mail/base.html +0 -105
  165. udata/templates/mail/base.txt +0 -6
  166. udata/templates/mail/button.html +0 -3
  167. udata/templates/mail/layouts/1-column.html +0 -19
  168. udata/templates/mail/layouts/2-columns.html +0 -20
  169. udata/templates/mail/layouts/center-panel.html +0 -16
  170. udata/tests/cli/test_db_cli.py +0 -68
  171. udata/tests/features/territories/__init__.py +0 -20
  172. udata/tests/features/territories/test_territories_api.py +0 -185
  173. udata/tests/frontend/test_hooks.py +0 -149
  174. udata-13.0.1.dev12.dist-info/METADATA +0 -133
  175. {udata-13.0.1.dev12.dist-info → udata-14.4.1.dev7.dist-info}/WHEEL +0 -0
  176. {udata-13.0.1.dev12.dist-info → udata-14.4.1.dev7.dist-info}/licenses/LICENSE +0 -0
  177. {udata-13.0.1.dev12.dist-info → udata-14.4.1.dev7.dist-info}/top_level.txt +0 -0
@@ -1,132 +1,13 @@
1
- import inspect
2
1
  import logging
3
- from importlib import import_module
4
2
 
5
- import pkg_resources
6
- from jinja2 import pass_context
7
- from markupsafe import Markup
8
-
9
- from udata import entrypoints
10
- from udata.i18n import I18nBlueprint
11
-
12
- from .markdown import UdataCleaner
13
3
  from .markdown import init_app as init_markdown
14
4
 
15
5
  log = logging.getLogger(__name__)
16
6
 
17
7
 
18
- hook = I18nBlueprint("hook", __name__)
19
-
20
- _template_hooks = {}
21
-
22
-
23
- @hook.app_template_global()
24
- def package_version(name: str) -> str:
25
- return pkg_resources.get_distribution(name).version
26
-
27
-
28
- @hook.app_template_filter()
29
- def avatar_placeholder(url):
30
- if url:
31
- return url
32
-
33
- return ""
34
-
35
-
36
- def _wrapper(func, name=None, when=None):
37
- name = name or func.__name__
38
- if name not in _template_hooks:
39
- _template_hooks[name] = []
40
- _template_hooks[name].append((func, when))
41
- return func
42
-
43
-
44
- def template_hook(func_or_name, when=None):
45
- if callable(func_or_name):
46
- return _wrapper(func_or_name)
47
- elif isinstance(func_or_name, str):
48
-
49
- def wrapper(func):
50
- return _wrapper(func, func_or_name, when=when)
51
-
52
- return wrapper
53
-
54
-
55
- def has_template_hook(name):
56
- return name in _template_hooks
57
-
58
-
59
- class HookRenderer:
60
- def __init__(self, funcs, ctx, *args, **kwargs):
61
- self.funcs = funcs
62
- self.ctx = ctx
63
- self.args = args
64
- self.kwargs = kwargs
65
-
66
- def __html__(self):
67
- return Markup(
68
- "".join(
69
- f(self.ctx, *self.args, **self.kwargs)
70
- for f, w in self.funcs
71
- if w is None or w(self.ctx)
72
- )
73
- )
74
-
75
- def __iter__(self):
76
- for func, when in self.funcs:
77
- if when is None or when(self.ctx):
78
- yield Markup(func(self.ctx, *self.args, **self.kwargs))
79
-
80
-
81
- @pass_context
82
- def render_template_hook(ctx, name, *args, **kwargs):
83
- if not has_template_hook(name):
84
- return ""
85
- return HookRenderer(_template_hooks[name], ctx, *args, **kwargs)
86
-
87
-
88
- @hook.app_context_processor
89
- def inject_hooks():
90
- return {
91
- "hook": render_template_hook,
92
- "has_hook": has_template_hook,
93
- }
94
-
95
-
96
- class SafeMarkup(Markup):
97
- """Markup object bypasses Jinja's escaping. This override allows to sanitize the resulting html."""
98
-
99
- def __new__(cls, base, *args, **kwargs):
100
- cleaner = UdataCleaner()
101
- return super().__new__(cls, cleaner.clean(base), *args, **kwargs)
102
-
103
-
104
- def _load_views(app, module):
105
- views = module if inspect.ismodule(module) else import_module(module)
106
- blueprint = getattr(views, "blueprint", None)
107
- if blueprint:
108
- app.register_blueprint(blueprint)
109
-
110
-
111
- VIEWS = ["core.storages"]
112
-
113
-
114
- def init_app(app, views=None):
115
- views = views or VIEWS
8
+ def init_app(app):
9
+ from udata.core.storages.views import blueprint as storage_blueprint
116
10
 
117
11
  init_markdown(app)
118
12
 
119
- for view in views:
120
- _load_views(app, "udata.{}.views".format(view))
121
-
122
- # Load hook blueprint
123
- app.register_blueprint(hook)
124
-
125
- # Load all plugins views and blueprints
126
- for module in entrypoints.get_enabled("udata.views", app).values():
127
- _load_views(app, module)
128
-
129
- # Load all plugins views and blueprints
130
- for module in entrypoints.get_enabled("udata.front", app).values():
131
- front_module = module if inspect.ismodule(module) else import_module(module)
132
- front_module.init_app(app)
13
+ app.register_blueprint(storage_blueprint)
@@ -7,8 +7,9 @@ import html2text
7
7
  import mistune
8
8
  from bleach.css_sanitizer import CSSSanitizer
9
9
  from bleach.linkifier import LinkifyFilter
10
- from flask import Markup, current_app, request
10
+ from flask import current_app, request
11
11
  from jinja2.filters import do_striptags, do_truncate
12
+ from markupsafe import Markup
12
13
  from werkzeug.local import LocalProxy
13
14
 
14
15
  from udata.i18n import _
udata/harvest/actions.py CHANGED
@@ -34,11 +34,6 @@ def get_source(ident):
34
34
  return HarvestSource.get(ident)
35
35
 
36
36
 
37
- def list_backends():
38
- """List all available backends"""
39
- return backends.get_all(current_app).values()
40
-
41
-
42
37
  def list_sources(owner=None, deleted=False):
43
38
  """List all harvest sources"""
44
39
  sources = HarvestSource.objects
@@ -177,7 +172,7 @@ def purge_jobs():
177
172
 
178
173
  def run(source: HarvestSource):
179
174
  """Launch or resume an harvesting for a given source if none is running"""
180
- cls = backends.get(current_app, source.backend)
175
+ cls = backends.get_backend(source.backend)
181
176
  backend = cls(source)
182
177
  backend.harvest()
183
178
 
@@ -189,7 +184,7 @@ def launch(source: HarvestSource):
189
184
 
190
185
  def preview(source: HarvestSource):
191
186
  """Preview an harvesting for a given source"""
192
- cls = backends.get(current_app, source.backend)
187
+ cls = backends.get_backend(source.backend)
193
188
  max_items = current_app.config["HARVEST_PREVIEW_MAX_ITEMS"]
194
189
  backend = cls(source, dryrun=True, max_items=max_items)
195
190
  return backend.harvest()
@@ -226,7 +221,7 @@ def preview_from_config(
226
221
  active=active,
227
222
  autoarchive=autoarchive,
228
223
  )
229
- cls = backends.get(current_app, source.backend)
224
+ cls = backends.get_backend(source.backend)
230
225
  max_items = current_app.config["HARVEST_PREVIEW_MAX_ITEMS"]
231
226
  backend = cls(source, dryrun=True, max_items=max_items)
232
227
  return backend.harvest()
@@ -259,7 +254,7 @@ def schedule(
259
254
  source.modify(
260
255
  periodic_task=PeriodicTask.objects.create(
261
256
  task="harvest",
262
- name="Harvest {0}".format(source.name),
257
+ name=f"Harvest {source.name} ({source.id})",
263
258
  description="Periodic Harvesting",
264
259
  enabled=True,
265
260
  args=[str(source.id)],
@@ -322,3 +317,23 @@ def attach(domain, filename):
322
317
  count += 1
323
318
 
324
319
  return AttachResult(count, errors)
320
+
321
+
322
+ def detach(dataset: Dataset):
323
+ """Detach a dataset from its harvest source
324
+
325
+ The dataset will be cleaned from harvested information
326
+ and will no longer be updated or archived by harvesting.
327
+ """
328
+ dataset.harvest = None
329
+ for resource in dataset.resources:
330
+ resource.harvest = None
331
+ dataset.save()
332
+
333
+
334
+ def detach_all_from_source(source: HarvestSource):
335
+ """Detach all datasets linked to a harvest source"""
336
+ datasets = Dataset.objects.filter(harvest__source_id=str(source.id))
337
+ for dataset in datasets:
338
+ detach(dataset)
339
+ return len(datasets)
udata/harvest/api.py CHANGED
@@ -6,10 +6,10 @@ from udata.api import API, api, fields
6
6
  from udata.auth import admin_permission
7
7
  from udata.core.dataservices.models import Dataservice
8
8
  from udata.core.dataset.api_fields import dataset_fields, dataset_ref_fields
9
- from udata.core.dataset.permissions import OwnablePermission
10
9
  from udata.core.organization.api_fields import org_ref_fields
11
10
  from udata.core.organization.permissions import EditOrganizationPermission
12
11
  from udata.core.user.api_fields import user_ref_fields
12
+ from udata.harvest.backends import get_enabled_backends
13
13
 
14
14
  from . import actions
15
15
  from .forms import HarvestSourceForm, HarvestSourceValidationForm
@@ -25,10 +25,6 @@ from .models import (
25
25
  ns = api.namespace("harvest", "Harvest related operations")
26
26
 
27
27
 
28
- def backends_ids():
29
- return [b.name for b in actions.list_backends()]
30
-
31
-
32
28
  error_fields = api.model(
33
29
  "HarvestError",
34
30
  {
@@ -58,6 +54,7 @@ item_fields = api.model(
58
54
  "HarvestItem",
59
55
  {
60
56
  "remote_id": fields.String(description="The item remote ID to process", required=True),
57
+ "remote_url": fields.String(description="The item remote url (if available)"),
61
58
  "dataset": fields.Nested(
62
59
  dataset_ref_fields, description="The processed dataset", allow_null=True
63
60
  ),
@@ -118,6 +115,18 @@ validation_fields = api.model(
118
115
  },
119
116
  )
120
117
 
118
+ source_permissions_fields = api.model(
119
+ "HarvestSourcePermissions",
120
+ {
121
+ "edit": fields.Permission(),
122
+ "delete": fields.Permission(),
123
+ "run": fields.Permission(),
124
+ "preview": fields.Permission(),
125
+ "validate": fields.Permission(),
126
+ "schedule": fields.Permission(),
127
+ },
128
+ )
129
+
121
130
  source_fields = api.model(
122
131
  "HarvestSource",
123
132
  {
@@ -126,7 +135,9 @@ source_fields = api.model(
126
135
  "description": fields.Markdown(description="The source description"),
127
136
  "url": fields.String(description="The source base URL", required=True),
128
137
  "backend": fields.String(
129
- description="The source backend", enum=backends_ids, required=True
138
+ description="The source backend",
139
+ enum=lambda: list(get_enabled_backends().keys()),
140
+ required=True,
130
141
  ),
131
142
  "config": fields.Raw(description="The configuration as key-value pairs"),
132
143
  "created_at": fields.ISODateTime(
@@ -154,6 +165,7 @@ source_fields = api.model(
154
165
  "schedule": fields.String(
155
166
  description="The source schedule (interval or cron expression)", readonly=True
156
167
  ),
168
+ "permissions": fields.Nested(source_permissions_fields, readonly=True),
157
169
  },
158
170
  )
159
171
 
@@ -314,7 +326,7 @@ class SourceAPI(API):
314
326
  @api.marshal_with(source_fields)
315
327
  def put(self, source: HarvestSource):
316
328
  """Update a harvest source"""
317
- OwnablePermission(source).test()
329
+ source.permissions["edit"].test()
318
330
  form = api.validate(HarvestSourceForm, source)
319
331
  source = actions.update_source(source, form.data)
320
332
  return source
@@ -323,18 +335,19 @@ class SourceAPI(API):
323
335
  @api.doc("delete_harvest_source")
324
336
  @api.marshal_with(source_fields)
325
337
  def delete(self, source: HarvestSource):
326
- OwnablePermission(source).test()
338
+ source.permissions["delete"].test()
327
339
  return actions.delete_source(source), 204
328
340
 
329
341
 
330
342
  @ns.route("/source/<harvest_source:source>/validate/", endpoint="validate_harvest_source")
331
343
  class ValidateSourceAPI(API):
332
344
  @api.doc("validate_harvest_source")
333
- @api.secure(admin_permission)
345
+ @api.secure
334
346
  @api.expect(validation_fields)
335
347
  @api.marshal_with(source_fields)
336
348
  def post(self, source: HarvestSource):
337
349
  """Validate or reject an harvest source"""
350
+ source.permissions["validate"].test()
338
351
  form = api.validate(HarvestSourceValidationForm)
339
352
  if form.state.data == VALIDATION_ACCEPTED:
340
353
  return actions.validate_source(source, form.comment.data)
@@ -355,7 +368,7 @@ class RunSourceAPI(API):
355
368
  "Cannot run source manually. Please contact the platform if you need to reschedule the harvester.",
356
369
  )
357
370
 
358
- OwnablePermission(source).test()
371
+ source.permissions["run"].test()
359
372
 
360
373
  if source.validation.state != VALIDATION_ACCEPTED:
361
374
  api.abort(400, "Source is not validated. Please validate the source before running.")
@@ -368,11 +381,12 @@ class RunSourceAPI(API):
368
381
  @ns.route("/source/<harvest_source:source>/schedule/", endpoint="schedule_harvest_source")
369
382
  class ScheduleSourceAPI(API):
370
383
  @api.doc("schedule_harvest_source")
371
- @api.secure(admin_permission)
384
+ @api.secure
372
385
  @api.expect((str, "A cron expression"))
373
386
  @api.marshal_with(source_fields)
374
387
  def post(self, source: HarvestSource):
375
388
  """Schedule an harvest source"""
389
+ source.permissions["schedule"].test()
376
390
  # Handle both syntax: quoted and unquoted
377
391
  try:
378
392
  data = request.json
@@ -381,10 +395,11 @@ class ScheduleSourceAPI(API):
381
395
  return actions.schedule(source, data)
382
396
 
383
397
  @api.doc("unschedule_harvest_source")
384
- @api.secure(admin_permission)
398
+ @api.secure
385
399
  @api.marshal_with(source_fields)
386
400
  def delete(self, source: HarvestSource):
387
401
  """Unschedule an harvest source"""
402
+ source.permissions["schedule"].test()
388
403
  return actions.unschedule(source), 204
389
404
 
390
405
 
@@ -409,6 +424,7 @@ class PreviewSourceAPI(API):
409
424
  @api.marshal_with(preview_job_fields)
410
425
  def get(self, source: HarvestSource):
411
426
  """Preview a single harvest source given an ID or a slug"""
427
+ source.permissions["preview"].test()
412
428
  return actions.preview(source)
413
429
 
414
430
 
@@ -438,7 +454,7 @@ class JobAPI(API):
438
454
  @api.expect(parser)
439
455
  @api.marshal_with(job_fields)
440
456
  def get(self, ident):
441
- """List all jobs for a given source"""
457
+ """Get a single job given an ID"""
442
458
  return actions.get_job(ident)
443
459
 
444
460
 
@@ -457,15 +473,7 @@ class ListBackendsAPI(API):
457
473
  "features": [f.as_dict() for f in b.features],
458
474
  "extra_configs": [f.as_dict() for f in b.extra_configs],
459
475
  }
460
- for b in actions.list_backends()
476
+ for b in get_enabled_backends().values()
461
477
  ],
462
478
  key=lambda b: b["label"],
463
479
  )
464
-
465
-
466
- @ns.route("/job_status/", endpoint="havest_job_status")
467
- class ListHarvesterAPI(API):
468
- @api.doc(model=[str])
469
- def get(self):
470
- """List all available harvesters"""
471
- return actions.list_backends()
@@ -1,17 +1,29 @@
1
- from udata.entrypoints import EntrypointError, get_enabled
1
+ from fnmatch import fnmatch
2
+ from importlib.metadata import entry_points
2
3
 
4
+ from flask import current_app
3
5
 
4
- def get(app, name):
5
- """Get a backend given its name"""
6
- backend = get_all(app).get(name)
6
+ from .base import BaseBackend, HarvestExtraConfig, HarvestFeature, HarvestFilter # noqa
7
+
8
+
9
+ def get_backend(name: str) -> type[BaseBackend] | None:
10
+ backend = get_enabled_backends().get(name)
7
11
  if not backend:
8
- msg = 'Harvest backend "{0}" is not registered'.format(name)
9
- raise EntrypointError(msg)
12
+ raise ValueError(f"Backend {name} unknown. Make sure it is declared in HARVESTER_BACKENDS.")
10
13
  return backend
11
14
 
12
15
 
13
- def get_all(app):
14
- return get_enabled("udata.harvesters", app)
16
+ def get_all_backends() -> dict[str, type[BaseBackend]]:
17
+ # Note that we use the `BaseBackend.name` and not `ep.name`. The entrypoint name
18
+ # is not used anymore.
19
+ return {ep.load().name: ep.load() for ep in entry_points(group="udata.harvesters")}
20
+
21
+
22
+ def is_backend_enabled(backend: type[BaseBackend]) -> bool:
23
+ return any(fnmatch(backend.name, g) for g in current_app.config["HARVESTER_BACKENDS"])
15
24
 
16
25
 
17
- from .base import BaseBackend, HarvestFeature, HarvestFilter, HarvestExtraConfig # noqa
26
+ def get_enabled_backends() -> dict[str, type[BaseBackend]]:
27
+ return {
28
+ name: backend for name, backend in get_all_backends().items() if is_backend_enabled(backend)
29
+ }
@@ -85,8 +85,8 @@ class BaseBackend(object):
85
85
  Also provides a few helpers needed on all or some backends.
86
86
  """
87
87
 
88
- name = None
89
- display_name = None
88
+ name: str
89
+ display_name: str | None = None
90
90
  verify_ssl = True
91
91
 
92
92
  # Define some allowed filters on the backend
@@ -166,6 +166,7 @@ class BaseBackend(object):
166
166
  log.debug(f"Starting harvesting {self.source.name} ({self.source.url})…")
167
167
  factory = HarvestJob if self.dryrun else HarvestJob.objects.create
168
168
  self.job = factory(status="initialized", started=datetime.utcnow(), source=self.source)
169
+ self.remote_ids = set()
169
170
 
170
171
  before_harvest_job.send(self)
171
172
  # Set harvest_activity_user on global context during the run
@@ -190,6 +191,7 @@ class BaseBackend(object):
190
191
 
191
192
  if any(i.status == "failed" for i in self.job.items):
192
193
  self.job.status += "-errors"
194
+
193
195
  except HarvestValidationError as e:
194
196
  log.exception(
195
197
  f'Harvesting validation failed for "{safe_unicode(self.source.name)}" ({self.source.backend})'
@@ -199,6 +201,15 @@ class BaseBackend(object):
199
201
 
200
202
  error = HarvestError(message=safe_unicode(e))
201
203
  self.job.errors.append(error)
204
+ except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e:
205
+ log.warning(
206
+ f'Harvesting connection error for "{safe_unicode(self.source.name)}" ({self.source.backend}): {e}'
207
+ )
208
+
209
+ self.job.status = "failed"
210
+
211
+ error = HarvestError(message=safe_unicode(e), details=traceback.format_exc())
212
+ self.job.errors.append(error)
202
213
  except Exception as e:
203
214
  log.exception(
204
215
  f'Harvesting failed for "{safe_unicode(self.source.name)}" ({self.source.backend})'
@@ -232,8 +243,13 @@ class BaseBackend(object):
232
243
 
233
244
  current_app.logger.addHandler(log_catcher)
234
245
  dataset = self.inner_process_dataset(item, **kwargs)
246
+ if dataset.harvest:
247
+ item.remote_url = dataset.harvest.remote_url
248
+
249
+ # Use `item.remote_id` from this point, because `inner_process_dataset` could have modified it.
250
+
251
+ self.ensure_unique_remote_id(item)
235
252
 
236
- # Use `item.remote_id` because `inner_process_dataset` could have modified it.
237
253
  dataset.harvest = self.update_dataset_harvest_info(dataset.harvest, item.remote_id)
238
254
  dataset.archived = None
239
255
 
@@ -291,6 +307,10 @@ class BaseBackend(object):
291
307
  raise HarvestSkipException("missing identifier")
292
308
 
293
309
  dataservice = self.inner_process_dataservice(item, **kwargs)
310
+ if dataservice.harvest:
311
+ item.remote_url = dataservice.harvest.remote_url
312
+
313
+ self.ensure_unique_remote_id(item)
294
314
 
295
315
  dataservice.harvest = self.update_dataservice_harvest_info(
296
316
  dataservice.harvest, remote_id
@@ -325,6 +345,12 @@ class BaseBackend(object):
325
345
  item.ended = datetime.utcnow()
326
346
  self.save_job()
327
347
 
348
+ def ensure_unique_remote_id(self, item):
349
+ if item.remote_id in self.remote_ids:
350
+ raise HarvestValidationError(f"Identifier '{item.remote_id}' already exists")
351
+
352
+ self.remote_ids.add(item.remote_id)
353
+
328
354
  def update_dataset_harvest_info(self, harvest: HarvestDatasetMetadata | None, remote_id: int):
329
355
  if not harvest:
330
356
  harvest = HarvestDatasetMetadata()
@@ -25,6 +25,7 @@ ALLOWED_RESOURCE_TYPES = ("dkan", "file", "file.upload", "api", "metadata")
25
25
 
26
26
 
27
27
  class CkanBackend(BaseBackend):
28
+ name = "ckan"
28
29
  display_name = "CKAN"
29
30
  filters = (
30
31
  HarvestFilter(_("Organization"), "organization", str, _("A CKAN Organization name")),
@@ -172,7 +173,10 @@ class CkanBackend(BaseBackend):
172
173
  continue
173
174
  elif key == "spatial":
174
175
  # GeoJSON representation (Polygon or Point)
175
- spatial_geom = json.loads(value)
176
+ if isinstance(value, dict):
177
+ spatial_geom = value
178
+ else:
179
+ spatial_geom = json.loads(value)
176
180
  elif key == "spatial-text":
177
181
  # Textual representation of the extent / location
178
182
  qs = GeoZone.objects(db.Q(name=value) | db.Q(slug=value))
@@ -212,12 +216,17 @@ class CkanBackend(BaseBackend):
212
216
  dataset.spatial.zones = [spatial_zone]
213
217
 
214
218
  if spatial_geom:
219
+ if "type" not in spatial_geom:
220
+ raise HarvestException(f"Spatial geometry {spatial_geom} without `type`")
221
+
215
222
  if spatial_geom["type"] == "Polygon":
216
223
  coordinates = [spatial_geom["coordinates"]]
217
224
  elif spatial_geom["type"] == "MultiPolygon":
218
225
  coordinates = spatial_geom["coordinates"]
219
226
  else:
220
- raise HarvestException("Unsupported spatial geometry")
227
+ raise HarvestException(
228
+ f"Unsupported spatial geometry {spatial_geom['type']} in {spatial_geom}. (Supported types are `Polygon` and `MultiPolygon`)"
229
+ )
221
230
  dataset.spatial.geom = {"type": "MultiPolygon", "coordinates": coordinates}
222
231
 
223
232
  if temporal_start and temporal_end:
@@ -265,5 +274,7 @@ class CkanBackend(BaseBackend):
265
274
 
266
275
 
267
276
  class DkanBackend(CkanBackend):
277
+ name = "dkan"
278
+ display_name = "DKAN"
268
279
  schema = dkan_schema
269
280
  filters = []
@@ -63,6 +63,7 @@ def extract_graph(source, target, node, specs):
63
63
 
64
64
 
65
65
  class DcatBackend(BaseBackend):
66
+ name = "dcat"
66
67
  display_name = "DCAT"
67
68
 
68
69
  def __init__(self, *args, **kwargs):
@@ -256,6 +257,7 @@ class CswDcatBackend(DcatBackend):
256
257
  The parsing of items is then the same as for the DcatBackend.
257
258
  """
258
259
 
260
+ name = "csw-dcat"
259
261
  display_name = "CSW-DCAT"
260
262
 
261
263
  # CSW_REQUEST is based on:
@@ -424,6 +426,7 @@ class CswIso19139DcatBackend(CswDcatBackend):
424
426
  The parsing of items is then the same as for the DcatBackend.
425
427
  """
426
428
 
429
+ name = "csw-iso-19139"
427
430
  display_name = "CSW-ISO-19139"
428
431
 
429
432
  extra_configs = (
@@ -129,6 +129,7 @@ def dictize(element):
129
129
 
130
130
 
131
131
  class MaafBackend(BaseBackend):
132
+ name = "maaf"
132
133
  display_name = "MAAF"
133
134
  verify_ssl = False
134
135
 
udata/harvest/commands.py CHANGED
@@ -2,7 +2,9 @@ import logging
2
2
 
3
3
  import click
4
4
 
5
- from udata.commands import cli
5
+ from udata.commands import KO, OK, cli, green, red
6
+ from udata.harvest.backends import get_all_backends, is_backend_enabled
7
+ from udata.models import Dataset
6
8
 
7
9
  from . import actions
8
10
 
@@ -89,9 +91,10 @@ def sources(scheduled=False):
89
91
  @grp.command()
90
92
  def backends():
91
93
  """List available backends"""
92
- log.info("Available backends:")
93
- for backend in actions.list_backends():
94
- log.info("%s (%s)", backend.name, backend.display_name or backend.name)
94
+ print("Available backends:")
95
+ for backend in get_all_backends().values():
96
+ status = green(OK) if is_backend_enabled(backend) else red(KO)
97
+ click.echo("{0} {1} ({2})".format(status, backend.display_name, backend.name))
95
98
 
96
99
 
97
100
  @grp.command()
@@ -154,3 +157,35 @@ def attach(domain, filename):
154
157
  log.info("Attaching datasets for domain %s", domain)
155
158
  result = actions.attach(domain, filename)
156
159
  log.info("Attached %s datasets to %s", result.success, domain)
160
+
161
+
162
+ @grp.command()
163
+ @click.argument("dataset_id")
164
+ def detach(dataset_id):
165
+ """
166
+ Detach a dataset_id from its harvest source
167
+
168
+ The dataset will be cleaned from harvested information
169
+ """
170
+ log.info(f"Detaching dataset {dataset_id}")
171
+ dataset = Dataset.get(dataset_id)
172
+ actions.detach(dataset)
173
+ log.info("Done")
174
+
175
+
176
+ @grp.command()
177
+ @click.argument("identifier")
178
+ def detach_all_from_source(identifier):
179
+ """
180
+ Detach all datasets from a harvest source
181
+
182
+ All the datasets will be cleaned from harvested information.
183
+ Make sure the harvest source won't create new duplicate datasets,
184
+ either by deactivating it or filtering its scope, etc.
185
+ """
186
+ log.info(f"Detaching datasets from harvest source {identifier}")
187
+ count = actions.detach_all_from_source(actions.get_source(identifier))
188
+ log.info(f"Detached {count} datasets")
189
+ log.warning(
190
+ "Make sure the harvest source won't create new duplicate datasets, either by deactivating it or filtering its scope, etc."
191
+ )