udata 10.8.1.dev36652__py2.py3-none-any.whl → 10.8.2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of udata might be problematic. Click here for more details.

Files changed (79) hide show
  1. udata/__init__.py +1 -1
  2. udata/app.py +0 -2
  3. udata/commands/db.py +22 -9
  4. udata/core/dataset/models.py +5 -3
  5. udata/core/discussions/api.py +2 -2
  6. udata/core/jobs/api.py +3 -3
  7. udata/core/metrics/helpers.py +10 -0
  8. udata/core/metrics/tasks.py +144 -1
  9. udata/core/organization/api.py +2 -2
  10. udata/core/post/api.py +1 -1
  11. udata/core/user/api.py +1 -1
  12. udata/features/identicon/api.py +1 -1
  13. udata/harvest/actions.py +24 -28
  14. udata/harvest/api.py +28 -36
  15. udata/harvest/backends/ckan/__init__.py +3 -0
  16. udata/harvest/backends/ckan/harvesters.py +274 -0
  17. udata/harvest/backends/ckan/schemas/__init__.py +0 -0
  18. udata/harvest/backends/ckan/schemas/ckan.py +86 -0
  19. udata/harvest/backends/ckan/schemas/dkan.py +98 -0
  20. udata/harvest/commands.py +7 -7
  21. udata/harvest/tasks.py +1 -1
  22. udata/harvest/tests/ckan/conftest.py +67 -0
  23. udata/harvest/tests/ckan/data/dkan-french-w-license.json +226 -0
  24. udata/harvest/tests/ckan/test_ckan_backend.py +697 -0
  25. udata/harvest/tests/ckan/test_ckan_backend_errors.py +140 -0
  26. udata/harvest/tests/ckan/test_ckan_backend_filters.py +130 -0
  27. udata/harvest/tests/ckan/test_dkan_backend.py +68 -0
  28. udata/harvest/tests/test_actions.py +27 -32
  29. udata/harvest/tests/test_api.py +23 -18
  30. udata/harvest/tests/test_dcat_backend.py +29 -29
  31. udata/migrations/2025-07-30-purge-old-harvest-dynamic-fields.py +29 -0
  32. udata/mongo/slug_fields.py +25 -8
  33. udata/routing.py +6 -0
  34. udata/static/chunks/{11.b6f741fcc366abfad9c4.js → 11.51d706fb9521c16976bc.js} +3 -3
  35. udata/static/chunks/{11.b6f741fcc366abfad9c4.js.map → 11.51d706fb9521c16976bc.js.map} +1 -1
  36. udata/static/chunks/{13.2d06442dd9a05d9777b5.js → 13.39e106d56f794ebd06a0.js} +2 -2
  37. udata/static/chunks/{13.2d06442dd9a05d9777b5.js.map → 13.39e106d56f794ebd06a0.js.map} +1 -1
  38. udata/static/chunks/{17.e8e4caaad5cb0cc0bacc.js → 17.70cbb4a91b002338007e.js} +2 -2
  39. udata/static/chunks/{17.e8e4caaad5cb0cc0bacc.js.map → 17.70cbb4a91b002338007e.js.map} +1 -1
  40. udata/static/chunks/{19.f03a102365af4315f9db.js → 19.a348a5fff8fe2801e52a.js} +3 -3
  41. udata/static/chunks/{19.f03a102365af4315f9db.js.map → 19.a348a5fff8fe2801e52a.js.map} +1 -1
  42. udata/static/chunks/{5.0fa1408dae4e76b87b2e.js → 5.343ca020a2d38cec1a14.js} +3 -3
  43. udata/static/chunks/{5.0fa1408dae4e76b87b2e.js.map → 5.343ca020a2d38cec1a14.js.map} +1 -1
  44. udata/static/chunks/{6.d663709d877baa44a71e.js → 6.a3b07de9dd2ca2d24e85.js} +3 -3
  45. udata/static/chunks/{6.d663709d877baa44a71e.js.map → 6.a3b07de9dd2ca2d24e85.js.map} +1 -1
  46. udata/static/chunks/{8.778091d55cd8ea39af6b.js → 8.462bb3029de008497675.js} +2 -2
  47. udata/static/chunks/{8.778091d55cd8ea39af6b.js.map → 8.462bb3029de008497675.js.map} +1 -1
  48. udata/static/common.js +1 -1
  49. udata/static/common.js.map +1 -1
  50. udata/tests/api/test_datasets_api.py +0 -46
  51. udata/tests/api/test_organizations_api.py +5 -0
  52. udata/tests/cli/test_db_cli.py +12 -0
  53. udata/tests/dataset/test_dataset_model.py +0 -16
  54. udata/tests/metrics/__init__.py +0 -0
  55. udata/tests/metrics/conftest.py +15 -0
  56. udata/tests/metrics/helpers.py +58 -0
  57. udata/tests/metrics/test_metrics.py +67 -0
  58. udata/tests/metrics/test_tasks.py +171 -0
  59. udata/translations/ar/LC_MESSAGES/udata.mo +0 -0
  60. udata/translations/ar/LC_MESSAGES/udata.po +72 -65
  61. udata/translations/de/LC_MESSAGES/udata.mo +0 -0
  62. udata/translations/de/LC_MESSAGES/udata.po +72 -65
  63. udata/translations/es/LC_MESSAGES/udata.mo +0 -0
  64. udata/translations/es/LC_MESSAGES/udata.po +72 -65
  65. udata/translations/fr/LC_MESSAGES/udata.mo +0 -0
  66. udata/translations/fr/LC_MESSAGES/udata.po +72 -65
  67. udata/translations/it/LC_MESSAGES/udata.mo +0 -0
  68. udata/translations/it/LC_MESSAGES/udata.po +72 -65
  69. udata/translations/pt/LC_MESSAGES/udata.mo +0 -0
  70. udata/translations/pt/LC_MESSAGES/udata.po +72 -65
  71. udata/translations/sr/LC_MESSAGES/udata.mo +0 -0
  72. udata/translations/sr/LC_MESSAGES/udata.po +72 -65
  73. udata/translations/udata.pot +74 -70
  74. {udata-10.8.1.dev36652.dist-info → udata-10.8.2.dist-info}/METADATA +16 -2
  75. {udata-10.8.1.dev36652.dist-info → udata-10.8.2.dist-info}/RECORD +79 -62
  76. {udata-10.8.1.dev36652.dist-info → udata-10.8.2.dist-info}/entry_points.txt +2 -0
  77. {udata-10.8.1.dev36652.dist-info → udata-10.8.2.dist-info}/LICENSE +0 -0
  78. {udata-10.8.1.dev36652.dist-info → udata-10.8.2.dist-info}/WHEEL +0 -0
  79. {udata-10.8.1.dev36652.dist-info → udata-10.8.2.dist-info}/top_level.txt +0 -0
udata/__init__.py CHANGED
@@ -4,5 +4,5 @@
4
4
  udata
5
5
  """
6
6
 
7
- __version__ = "10.8.1.dev"
7
+ __version__ = "10.8.2"
8
8
  __description__ = "Open data portal"
udata/app.py CHANGED
@@ -210,7 +210,6 @@ def register_extensions(app):
210
210
  routing,
211
211
  search,
212
212
  sentry,
213
- sitemap,
214
213
  tasks,
215
214
  )
216
215
 
@@ -225,7 +224,6 @@ def register_extensions(app):
225
224
  csrf.init_app(app)
226
225
  mail.init_app(app)
227
226
  search.init_app(app)
228
- sitemap.init_app(app)
229
227
  sentry.init_app(app)
230
228
  return app
231
229
 
udata/commands/db.py CHANGED
@@ -2,6 +2,7 @@ import collections
2
2
  import copy
3
3
  import logging
4
4
  import os
5
+ import sys
5
6
  import traceback
6
7
  from itertools import groupby
7
8
  from typing import Optional
@@ -312,15 +313,26 @@ def check_references(models_to_check):
312
313
  f"\t{model.__name__}#{obj.id} have a broken reference for `{reference['name']}`"
313
314
  )
314
315
  elif reference["type"] == "list":
315
- attr_list = getattr(obj, reference["name"], [])
316
- for i, sub in enumerate(attr_list):
317
- # If it's still an instance of DBRef it means that it failed to
318
- # dereference the ID.
319
- if isinstance(sub, DBRef):
320
- errors[model][key] += 1
321
- print_and_save(
322
- f"\t{model.__name__}#{obj.id} have a broken reference for {reference['name']}[{i}]"
323
- )
316
+ field_exists = (
317
+ f"{reference['name']}__exists" # Eg: "contact_points__exists"
318
+ )
319
+ if model.objects(id=obj.id, **{field_exists: True}).count() == 0:
320
+ # See https://github.com/MongoEngine/mongoengine/issues/267#issuecomment-283065318
321
+ # Setting it explicitely to an empty list actually removes the field, it shouldn't.
322
+ errors[model][key] += 1
323
+ print_and_save(
324
+ f"\t{model.__name__}#{obj.id} have a non existing field `{reference['name']}`, instead of an empty list"
325
+ )
326
+ else:
327
+ attr_list = getattr(obj, reference["name"])
328
+ for i, sub in enumerate(attr_list):
329
+ # If it's still an instance of DBRef it means that it failed to
330
+ # dereference the ID.
331
+ if isinstance(sub, DBRef):
332
+ errors[model][key] += 1
333
+ print_and_save(
334
+ f"\t{model.__name__}#{obj.id} have a broken reference for {reference['name']}[{i}]"
335
+ )
324
336
  elif reference["type"] == "embed_list":
325
337
  p1, p2 = reference["name"].split("__")
326
338
  attr_list = getattr(obj, p1, [])
@@ -380,6 +392,7 @@ def check_references(models_to_check):
380
392
  sentry_sdk.capture_message(f"{total} integrity errors", "fatal")
381
393
  except ImportError:
382
394
  print("`sentry_sdk` not installed. The errors weren't reported")
395
+ sys.exit(1)
383
396
 
384
397
 
385
398
  @grp.command()
@@ -10,7 +10,6 @@ import requests
10
10
  from blinker import signal
11
11
  from dateutil.parser import parse as parse_dt
12
12
  from flask import current_app, url_for
13
- from mongoengine import DynamicEmbeddedDocument
14
13
  from mongoengine import ValidationError as MongoEngineValidationError
15
14
  from mongoengine.fields import DateTimeField
16
15
  from mongoengine.signals import post_save, pre_init, pre_save
@@ -78,7 +77,7 @@ def get_json_ld_extra(key, value):
78
77
  }
79
78
 
80
79
 
81
- class HarvestDatasetMetadata(DynamicEmbeddedDocument):
80
+ class HarvestDatasetMetadata(db.EmbeddedDocument):
82
81
  backend = db.StringField()
83
82
  created_at = db.DateTimeField()
84
83
  modified_at = db.DateTimeField()
@@ -91,12 +90,15 @@ class HarvestDatasetMetadata(DynamicEmbeddedDocument):
91
90
  dct_identifier = db.StringField()
92
91
  archived_at = db.DateTimeField()
93
92
  archived = db.StringField()
93
+ ckan_name = db.StringField()
94
+ ckan_source = db.StringField()
94
95
 
95
96
 
96
- class HarvestResourceMetadata(DynamicEmbeddedDocument):
97
+ class HarvestResourceMetadata(db.EmbeddedDocument):
97
98
  created_at = db.DateTimeField()
98
99
  modified_at = db.DateTimeField()
99
100
  uri = db.StringField()
101
+ dct_identifier = db.StringField()
100
102
 
101
103
 
102
104
  class Schema(db.EmbeddedDocument):
@@ -247,7 +247,7 @@ class DiscussionAPI(API):
247
247
  return "", 204
248
248
 
249
249
 
250
- @ns.route("/<id>/comments/<int:cidx>/spam", endpoint="discussion_comment_spam")
250
+ @ns.route("/<id>/comments/<int:cidx>/spam/", endpoint="discussion_comment_spam")
251
251
  @ns.doc(delete={"id": "unspam"})
252
252
  class DiscussionCommentSpamAPI(SpamAPIMixin):
253
253
  def get_model(self, id, cidx):
@@ -259,7 +259,7 @@ class DiscussionCommentSpamAPI(SpamAPIMixin):
259
259
  return discussion, discussion.discussion[cidx]
260
260
 
261
261
 
262
- @ns.route("/<id>/comments/<int:cidx>", endpoint="discussion_comment")
262
+ @ns.route("/<id>/comments/<int:cidx>/", endpoint="discussion_comment")
263
263
  class DiscussionCommentAPI(API):
264
264
  """
265
265
  Base class for a comment in a discussion thread.
udata/core/jobs/api.py CHANGED
@@ -101,7 +101,7 @@ class JobsAPI(API):
101
101
  return form.save(), 201
102
102
 
103
103
 
104
- @ns.route("/jobs/<string:id>", endpoint="job")
104
+ @ns.route("/jobs/<string:id>/", endpoint="job")
105
105
  @api.param("id", "A job ID")
106
106
  class JobAPI(API):
107
107
  def get_or_404(self, id):
@@ -139,7 +139,7 @@ class JobAPI(API):
139
139
  return "", 204
140
140
 
141
141
 
142
- @ns.route("/tasks/<string:id>", endpoint="task")
142
+ @ns.route("/tasks/<string:id>/", endpoint="task")
143
143
  class TaskAPI(API):
144
144
  @api.marshal_with(task_fields)
145
145
  def get(self, id):
@@ -159,7 +159,7 @@ class TaskAPI(API):
159
159
  return data
160
160
 
161
161
 
162
- @ns.route("/jobs/schedulables", endpoint="schedulable_jobs")
162
+ @ns.route("/jobs/schedulables/", endpoint="schedulable_jobs")
163
163
  class JobsReferenceAPI(API):
164
164
  @api.doc(model=[str])
165
165
  def get(self):
@@ -2,6 +2,7 @@ import logging
2
2
  from collections import OrderedDict
3
3
  from datetime import datetime, timedelta
4
4
  from typing import Dict, List, Union
5
+ from urllib.parse import urlencode
5
6
 
6
7
  import requests
7
8
  from bson import ObjectId
@@ -68,6 +69,15 @@ def get_metrics_for_model(
68
69
  return [{} for _ in range(len(metrics_labels))]
69
70
 
70
71
 
72
+ def get_download_url(model: str, id: Union[str, ObjectId, None]) -> str:
73
+ api_namespace = model + "s" if model != "site" else model
74
+ base_url = f"{current_app.config['METRICS_API']}/{api_namespace}/data/csv/"
75
+ args = {"metric_month__sort": "asc"}
76
+ if id:
77
+ args[f"{model}_id__exact"] = id
78
+ return f"{base_url}?{urlencode(args)}"
79
+
80
+
71
81
  def compute_monthly_aggregated_metrics(aggregation_res: CommandCursor) -> OrderedDict:
72
82
  monthly_metrics = OrderedDict((month, 0) for month in get_last_13_months())
73
83
  for monthly_count in aggregation_res:
@@ -1,9 +1,152 @@
1
+ import logging
2
+ import time
3
+ from functools import wraps
4
+ from typing import Dict, List
5
+
6
+ import requests
1
7
  from flask import current_app
2
8
 
9
+ from udata.core.dataservices.models import Dataservice
3
10
  from udata.core.metrics.signals import on_site_metrics_computed
4
- from udata.models import Site
11
+ from udata.models import CommunityResource, Dataset, Organization, Reuse, Site, db
5
12
  from udata.tasks import job
6
13
 
14
+ log = logging.getLogger(__name__)
15
+
16
+
17
+ def log_timing(func):
18
+ @wraps(func)
19
+ def timeit_wrapper(*args, **kwargs):
20
+ # Better log if we're using Python 3.9
21
+ name = func.__name__
22
+ model = name.removeprefix("update_") if hasattr(name, "removeprefix") else name
23
+
24
+ log.info(f"Processing {model}…")
25
+ start_time = time.perf_counter()
26
+ result = func(*args, **kwargs)
27
+ total_time = time.perf_counter() - start_time
28
+ log.info(f"Done in {total_time:.4f} seconds.")
29
+ return result
30
+
31
+ return timeit_wrapper
32
+
33
+
34
+ def save_model(model: db.Document, model_id: str, metrics: Dict[str, int]) -> None:
35
+ try:
36
+ result = model.objects(id=model_id).update(
37
+ **{f"set__metrics__{key}": value for key, value in metrics.items()}
38
+ )
39
+
40
+ if result is None:
41
+ log.debug(f"{model.__name__} not found", extra={"id": model_id})
42
+ except Exception as e:
43
+ log.exception(e)
44
+
45
+
46
+ def iterate_on_metrics(target: str, value_keys: List[str], page_size: int = 50) -> dict:
47
+ """
48
+ Yield all elements with not zero values for the keys inside `value_keys`.
49
+ If you pass ['visit', 'download_resource'], it will do a `OR` and get
50
+ metrics with one of the two values not zero.
51
+ """
52
+ yielded = set()
53
+
54
+ for value_key in value_keys:
55
+ url = f"{current_app.config['METRICS_API']}/{target}_total/data/"
56
+ url += f"?{value_key}__greater=1&page_size={page_size}"
57
+
58
+ with requests.Session() as session:
59
+ while url is not None:
60
+ r = session.get(url, timeout=10)
61
+ r.raise_for_status()
62
+ data = r.json()
63
+
64
+ for row in data["data"]:
65
+ if row["__id"] not in yielded:
66
+ yielded.add(row["__id"])
67
+ yield row
68
+
69
+ url = data["links"].get("next")
70
+
71
+
72
+ @log_timing
73
+ def update_resources_and_community_resources():
74
+ for data in iterate_on_metrics("resources", ["download_resource"]):
75
+ if data["dataset_id"] is None:
76
+ save_model(
77
+ CommunityResource,
78
+ data["resource_id"],
79
+ {
80
+ "views": data["download_resource"],
81
+ },
82
+ )
83
+ else:
84
+ Dataset.objects(resources__id=data["resource_id"]).update(
85
+ **{"set__resources__$__metrics__views": data["download_resource"]}
86
+ )
87
+
88
+
89
+ @log_timing
90
+ def update_datasets():
91
+ for data in iterate_on_metrics("datasets", ["visit", "download_resource"]):
92
+ save_model(
93
+ Dataset,
94
+ data["dataset_id"],
95
+ {
96
+ "views": data["visit"],
97
+ "resources_downloads": data["download_resource"],
98
+ },
99
+ )
100
+
101
+
102
+ @log_timing
103
+ def update_dataservices():
104
+ for data in iterate_on_metrics("dataservices", ["visit"]):
105
+ save_model(
106
+ Dataservice,
107
+ data["dataservice_id"],
108
+ {
109
+ "views": data["visit"],
110
+ },
111
+ )
112
+
113
+
114
+ @log_timing
115
+ def update_reuses():
116
+ for data in iterate_on_metrics("reuses", ["visit"]):
117
+ save_model(Reuse, data["reuse_id"], {"views": data["visit"]})
118
+
119
+
120
+ @log_timing
121
+ def update_organizations():
122
+ # We're currently using visit_dataset as global metric for an orga
123
+ for data in iterate_on_metrics("organizations", ["visit_dataset"]):
124
+ save_model(
125
+ Organization,
126
+ data["organization_id"],
127
+ {
128
+ "views": data["visit_dataset"],
129
+ },
130
+ )
131
+
132
+
133
+ def update_metrics_for_models():
134
+ log.info("Starting…")
135
+ update_datasets()
136
+ update_resources_and_community_resources()
137
+ update_dataservices()
138
+ update_reuses()
139
+ update_organizations()
140
+
141
+
142
+ @job("update-metrics", route="low.metrics")
143
+ def update_metrics(self):
144
+ """Update udata objects metrics"""
145
+ if not current_app.config["METRICS_API"]:
146
+ log.error("You need to set METRICS_API to run update-metrics")
147
+ exit(1)
148
+ update_metrics_for_models()
149
+
7
150
 
8
151
  @job("compute-site-metrics")
9
152
  def compute_site_metrics(self):
@@ -99,7 +99,7 @@ class OrgApiParser(ModelApiParser):
99
99
  if args.get("badge"):
100
100
  organizations = organizations.with_badge(args["badge"])
101
101
  if args.get("name"):
102
- organizations = organizations.filter(name=args["name"])
102
+ organizations = organizations.filter(name__iexact=args["name"])
103
103
  if args.get("business_number_id"):
104
104
  organizations = organizations.filter(business_number_id=args["business_number_id"])
105
105
  return organizations
@@ -449,7 +449,7 @@ class MembershipRefuseAPI(MembershipAPI):
449
449
  return {}, 200
450
450
 
451
451
 
452
- @ns.route("/<org:org>/member/<user:user>", endpoint="member", doc=common_doc)
452
+ @ns.route("/<org:org>/member/<user:user>/", endpoint="member", doc=common_doc)
453
453
  class MemberAPI(API):
454
454
  @api.secure
455
455
  @api.expect(member_fields)
udata/core/post/api.py CHANGED
@@ -168,7 +168,7 @@ class PostAPI(API):
168
168
  return "", 204
169
169
 
170
170
 
171
- @ns.route("/<post:post>/publish", endpoint="publish_post")
171
+ @ns.route("/<post:post>/publish/", endpoint="publish_post")
172
172
  class PublishPostAPI(API):
173
173
  @api.secure(admin_permission)
174
174
  @api.doc("publish_post")
udata/core/user/api.py CHANGED
@@ -194,7 +194,7 @@ class MyOrgDiscussionsAPI(API):
194
194
  return list(discussions)
195
195
 
196
196
 
197
- @me.route("/apikey", endpoint="my_apikey")
197
+ @me.route("/apikey/", endpoint="my_apikey")
198
198
  class ApiKeyAPI(API):
199
199
  @api.secure
200
200
  @api.doc("generate_apikey")
@@ -5,7 +5,7 @@ from . import backends
5
5
  ns = api.namespace("avatars", "Avatars")
6
6
 
7
7
 
8
- @ns.route("/<identifier>/<int:size>", endpoint="avatar")
8
+ @ns.route("/<identifier>/<int:size>/", endpoint="avatar")
9
9
  class IdenticonAPI(API):
10
10
  @api.doc("avatars")
11
11
  def get(self, identifier, size):
udata/harvest/actions.py CHANGED
@@ -29,6 +29,11 @@ log = logging.getLogger(__name__)
29
29
  DEFAULT_PAGE_SIZE = 10
30
30
 
31
31
 
32
+ def get_source(ident):
33
+ """Get an harvest source given its ID or its slug"""
34
+ return HarvestSource.get(ident)
35
+
36
+
32
37
  def list_backends():
33
38
  """List all available backends"""
34
39
  return backends.get_all(current_app).values()
@@ -44,11 +49,6 @@ def list_sources(owner=None, deleted=False):
44
49
  return list(sources)
45
50
 
46
51
 
47
- def get_source(ident):
48
- """Get an harvest source given its ID or its slug"""
49
- return HarvestSource.get(ident)
50
-
51
-
52
52
  def get_job(ident):
53
53
  """Get an harvest job given its ID"""
54
54
  return HarvestJob.objects.get(id=ident)
@@ -89,31 +89,28 @@ def create_source(
89
89
  return source
90
90
 
91
91
 
92
- def update_source(ident, data):
92
+ def update_source(source: HarvestSource, data):
93
93
  """Update an harvest source"""
94
- source = get_source(ident)
95
94
  source.modify(**data)
96
95
  signals.harvest_source_updated.send(source)
97
96
  return source
98
97
 
99
98
 
100
- def validate_source(ident, comment=None):
99
+ def validate_source(source: HarvestSource, comment=None):
101
100
  """Validate a source for automatic harvesting"""
102
- source = get_source(ident)
103
101
  source.validation.on = datetime.utcnow()
104
102
  source.validation.comment = comment
105
103
  source.validation.state = VALIDATION_ACCEPTED
106
104
  if current_user.is_authenticated:
107
105
  source.validation.by = current_user._get_current_object()
108
106
  source.save()
109
- schedule(ident, cron=current_app.config["HARVEST_DEFAULT_SCHEDULE"])
110
- launch(ident)
107
+ schedule(source, cron=current_app.config["HARVEST_DEFAULT_SCHEDULE"])
108
+ launch(source)
111
109
  return source
112
110
 
113
111
 
114
- def reject_source(ident, comment):
112
+ def reject_source(source: HarvestSource, comment):
115
113
  """Reject a source for automatic harvesting"""
116
- source = get_source(ident)
117
114
  source.validation.on = datetime.utcnow()
118
115
  source.validation.comment = comment
119
116
  source.validation.state = VALIDATION_REFUSED
@@ -123,18 +120,16 @@ def reject_source(ident, comment):
123
120
  return source
124
121
 
125
122
 
126
- def delete_source(ident):
123
+ def delete_source(source: HarvestSource):
127
124
  """Delete an harvest source"""
128
- source = get_source(ident)
129
125
  source.deleted = datetime.utcnow()
130
126
  source.save()
131
127
  signals.harvest_source_deleted.send(source)
132
128
  return source
133
129
 
134
130
 
135
- def clean_source(ident):
131
+ def clean_source(source: HarvestSource):
136
132
  """Deletes all datasets linked to a harvest source"""
137
- source = get_source(ident)
138
133
  datasets = Dataset.objects.filter(harvest__source_id=str(source.id))
139
134
  for dataset in datasets:
140
135
  dataset.deleted = datetime.utcnow()
@@ -180,22 +175,20 @@ def purge_jobs():
180
175
  return HarvestJob.objects(created__lt=expiration).delete()
181
176
 
182
177
 
183
- def run(ident):
178
+ def run(source: HarvestSource):
184
179
  """Launch or resume an harvesting for a given source if none is running"""
185
- source = get_source(ident)
186
180
  cls = backends.get(current_app, source.backend)
187
181
  backend = cls(source)
188
182
  backend.harvest()
189
183
 
190
184
 
191
- def launch(ident):
185
+ def launch(source: HarvestSource):
192
186
  """Launch or resume an harvesting for a given source if none is running"""
193
- return harvest.delay(ident)
187
+ return harvest.delay(source.id)
194
188
 
195
189
 
196
- def preview(ident):
190
+ def preview(source: HarvestSource):
197
191
  """Preview an harvesting for a given source"""
198
- source = get_source(ident)
199
192
  cls = backends.get(current_app, source.backend)
200
193
  max_items = current_app.config["HARVEST_PREVIEW_MAX_ITEMS"]
201
194
  backend = cls(source, dryrun=True, max_items=max_items)
@@ -240,11 +233,15 @@ def preview_from_config(
240
233
 
241
234
 
242
235
  def schedule(
243
- ident, cron=None, minute="*", hour="*", day_of_week="*", day_of_month="*", month_of_year="*"
236
+ source: HarvestSource,
237
+ cron=None,
238
+ minute="*",
239
+ hour="*",
240
+ day_of_week="*",
241
+ day_of_month="*",
242
+ month_of_year="*",
244
243
  ):
245
244
  """Schedule an harvesting on a source given a crontab"""
246
- source = get_source(ident)
247
-
248
245
  if cron:
249
246
  minute, hour, day_of_month, month_of_year, day_of_week = cron.split()
250
247
 
@@ -273,9 +270,8 @@ def schedule(
273
270
  return source
274
271
 
275
272
 
276
- def unschedule(ident):
273
+ def unschedule(source: HarvestSource):
277
274
  """Unschedule an harvesting on a source"""
278
- source = get_source(ident)
279
275
  if not source.periodic_task:
280
276
  msg = "Harvesting on source {0} is ot scheduled".format(source.name)
281
277
  raise ValueError(msg)