udata 10.8.1.dev36703__py2.py3-none-any.whl → 10.8.2__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of udata might be problematic. Click here for more details.
- udata/__init__.py +1 -1
- udata/app.py +0 -2
- udata/commands/db.py +22 -9
- udata/core/dataset/models.py +5 -3
- udata/core/discussions/api.py +2 -2
- udata/core/jobs/api.py +3 -3
- udata/core/metrics/helpers.py +10 -0
- udata/core/metrics/tasks.py +144 -1
- udata/core/organization/api.py +2 -2
- udata/core/post/api.py +1 -1
- udata/core/user/api.py +1 -1
- udata/features/identicon/api.py +1 -1
- udata/harvest/actions.py +24 -28
- udata/harvest/api.py +28 -36
- udata/harvest/backends/ckan/__init__.py +3 -0
- udata/harvest/backends/ckan/harvesters.py +274 -0
- udata/harvest/backends/ckan/schemas/__init__.py +0 -0
- udata/harvest/backends/ckan/schemas/ckan.py +86 -0
- udata/harvest/backends/ckan/schemas/dkan.py +98 -0
- udata/harvest/commands.py +7 -7
- udata/harvest/tasks.py +1 -1
- udata/harvest/tests/ckan/conftest.py +67 -0
- udata/harvest/tests/ckan/data/dkan-french-w-license.json +226 -0
- udata/harvest/tests/ckan/test_ckan_backend.py +697 -0
- udata/harvest/tests/ckan/test_ckan_backend_errors.py +140 -0
- udata/harvest/tests/ckan/test_ckan_backend_filters.py +130 -0
- udata/harvest/tests/ckan/test_dkan_backend.py +68 -0
- udata/harvest/tests/test_actions.py +27 -32
- udata/harvest/tests/test_api.py +23 -18
- udata/harvest/tests/test_dcat_backend.py +29 -29
- udata/migrations/2025-07-30-purge-old-harvest-dynamic-fields.py +29 -0
- udata/mongo/slug_fields.py +1 -1
- udata/routing.py +6 -0
- udata/static/chunks/{11.b6f741fcc366abfad9c4.js → 11.51d706fb9521c16976bc.js} +3 -3
- udata/static/chunks/{11.b6f741fcc366abfad9c4.js.map → 11.51d706fb9521c16976bc.js.map} +1 -1
- udata/static/chunks/{13.2d06442dd9a05d9777b5.js → 13.39e106d56f794ebd06a0.js} +2 -2
- udata/static/chunks/{13.2d06442dd9a05d9777b5.js.map → 13.39e106d56f794ebd06a0.js.map} +1 -1
- udata/static/chunks/{17.e8e4caaad5cb0cc0bacc.js → 17.70cbb4a91b002338007e.js} +2 -2
- udata/static/chunks/{17.e8e4caaad5cb0cc0bacc.js.map → 17.70cbb4a91b002338007e.js.map} +1 -1
- udata/static/chunks/{19.f03a102365af4315f9db.js → 19.a348a5fff8fe2801e52a.js} +3 -3
- udata/static/chunks/{19.f03a102365af4315f9db.js.map → 19.a348a5fff8fe2801e52a.js.map} +1 -1
- udata/static/chunks/{5.0fa1408dae4e76b87b2e.js → 5.343ca020a2d38cec1a14.js} +3 -3
- udata/static/chunks/{5.0fa1408dae4e76b87b2e.js.map → 5.343ca020a2d38cec1a14.js.map} +1 -1
- udata/static/chunks/{6.d663709d877baa44a71e.js → 6.a3b07de9dd2ca2d24e85.js} +3 -3
- udata/static/chunks/{6.d663709d877baa44a71e.js.map → 6.a3b07de9dd2ca2d24e85.js.map} +1 -1
- udata/static/chunks/{8.778091d55cd8ea39af6b.js → 8.462bb3029de008497675.js} +2 -2
- udata/static/chunks/{8.778091d55cd8ea39af6b.js.map → 8.462bb3029de008497675.js.map} +1 -1
- udata/static/common.js +1 -1
- udata/static/common.js.map +1 -1
- udata/tests/api/test_datasets_api.py +0 -46
- udata/tests/api/test_organizations_api.py +5 -0
- udata/tests/cli/test_db_cli.py +12 -0
- udata/tests/dataset/test_dataset_model.py +0 -16
- udata/tests/metrics/__init__.py +0 -0
- udata/tests/metrics/conftest.py +15 -0
- udata/tests/metrics/helpers.py +58 -0
- udata/tests/metrics/test_metrics.py +67 -0
- udata/tests/metrics/test_tasks.py +171 -0
- udata/translations/ar/LC_MESSAGES/udata.mo +0 -0
- udata/translations/ar/LC_MESSAGES/udata.po +72 -65
- udata/translations/de/LC_MESSAGES/udata.mo +0 -0
- udata/translations/de/LC_MESSAGES/udata.po +72 -65
- udata/translations/es/LC_MESSAGES/udata.mo +0 -0
- udata/translations/es/LC_MESSAGES/udata.po +72 -65
- udata/translations/fr/LC_MESSAGES/udata.mo +0 -0
- udata/translations/fr/LC_MESSAGES/udata.po +72 -65
- udata/translations/it/LC_MESSAGES/udata.mo +0 -0
- udata/translations/it/LC_MESSAGES/udata.po +72 -65
- udata/translations/pt/LC_MESSAGES/udata.mo +0 -0
- udata/translations/pt/LC_MESSAGES/udata.po +72 -65
- udata/translations/sr/LC_MESSAGES/udata.mo +0 -0
- udata/translations/sr/LC_MESSAGES/udata.po +72 -65
- udata/translations/udata.pot +74 -70
- {udata-10.8.1.dev36703.dist-info → udata-10.8.2.dist-info}/METADATA +15 -2
- {udata-10.8.1.dev36703.dist-info → udata-10.8.2.dist-info}/RECORD +79 -62
- {udata-10.8.1.dev36703.dist-info → udata-10.8.2.dist-info}/entry_points.txt +2 -0
- {udata-10.8.1.dev36703.dist-info → udata-10.8.2.dist-info}/LICENSE +0 -0
- {udata-10.8.1.dev36703.dist-info → udata-10.8.2.dist-info}/WHEEL +0 -0
- {udata-10.8.1.dev36703.dist-info → udata-10.8.2.dist-info}/top_level.txt +0 -0
udata/__init__.py
CHANGED
udata/app.py
CHANGED
|
@@ -210,7 +210,6 @@ def register_extensions(app):
|
|
|
210
210
|
routing,
|
|
211
211
|
search,
|
|
212
212
|
sentry,
|
|
213
|
-
sitemap,
|
|
214
213
|
tasks,
|
|
215
214
|
)
|
|
216
215
|
|
|
@@ -225,7 +224,6 @@ def register_extensions(app):
|
|
|
225
224
|
csrf.init_app(app)
|
|
226
225
|
mail.init_app(app)
|
|
227
226
|
search.init_app(app)
|
|
228
|
-
sitemap.init_app(app)
|
|
229
227
|
sentry.init_app(app)
|
|
230
228
|
return app
|
|
231
229
|
|
udata/commands/db.py
CHANGED
|
@@ -2,6 +2,7 @@ import collections
|
|
|
2
2
|
import copy
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
|
+
import sys
|
|
5
6
|
import traceback
|
|
6
7
|
from itertools import groupby
|
|
7
8
|
from typing import Optional
|
|
@@ -312,15 +313,26 @@ def check_references(models_to_check):
|
|
|
312
313
|
f"\t{model.__name__}#{obj.id} have a broken reference for `{reference['name']}`"
|
|
313
314
|
)
|
|
314
315
|
elif reference["type"] == "list":
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
316
|
+
field_exists = (
|
|
317
|
+
f"{reference['name']}__exists" # Eg: "contact_points__exists"
|
|
318
|
+
)
|
|
319
|
+
if model.objects(id=obj.id, **{field_exists: True}).count() == 0:
|
|
320
|
+
# See https://github.com/MongoEngine/mongoengine/issues/267#issuecomment-283065318
|
|
321
|
+
# Setting it explicitely to an empty list actually removes the field, it shouldn't.
|
|
322
|
+
errors[model][key] += 1
|
|
323
|
+
print_and_save(
|
|
324
|
+
f"\t{model.__name__}#{obj.id} have a non existing field `{reference['name']}`, instead of an empty list"
|
|
325
|
+
)
|
|
326
|
+
else:
|
|
327
|
+
attr_list = getattr(obj, reference["name"])
|
|
328
|
+
for i, sub in enumerate(attr_list):
|
|
329
|
+
# If it's still an instance of DBRef it means that it failed to
|
|
330
|
+
# dereference the ID.
|
|
331
|
+
if isinstance(sub, DBRef):
|
|
332
|
+
errors[model][key] += 1
|
|
333
|
+
print_and_save(
|
|
334
|
+
f"\t{model.__name__}#{obj.id} have a broken reference for {reference['name']}[{i}]"
|
|
335
|
+
)
|
|
324
336
|
elif reference["type"] == "embed_list":
|
|
325
337
|
p1, p2 = reference["name"].split("__")
|
|
326
338
|
attr_list = getattr(obj, p1, [])
|
|
@@ -380,6 +392,7 @@ def check_references(models_to_check):
|
|
|
380
392
|
sentry_sdk.capture_message(f"{total} integrity errors", "fatal")
|
|
381
393
|
except ImportError:
|
|
382
394
|
print("`sentry_sdk` not installed. The errors weren't reported")
|
|
395
|
+
sys.exit(1)
|
|
383
396
|
|
|
384
397
|
|
|
385
398
|
@grp.command()
|
udata/core/dataset/models.py
CHANGED
|
@@ -10,7 +10,6 @@ import requests
|
|
|
10
10
|
from blinker import signal
|
|
11
11
|
from dateutil.parser import parse as parse_dt
|
|
12
12
|
from flask import current_app, url_for
|
|
13
|
-
from mongoengine import DynamicEmbeddedDocument
|
|
14
13
|
from mongoengine import ValidationError as MongoEngineValidationError
|
|
15
14
|
from mongoengine.fields import DateTimeField
|
|
16
15
|
from mongoengine.signals import post_save, pre_init, pre_save
|
|
@@ -78,7 +77,7 @@ def get_json_ld_extra(key, value):
|
|
|
78
77
|
}
|
|
79
78
|
|
|
80
79
|
|
|
81
|
-
class HarvestDatasetMetadata(
|
|
80
|
+
class HarvestDatasetMetadata(db.EmbeddedDocument):
|
|
82
81
|
backend = db.StringField()
|
|
83
82
|
created_at = db.DateTimeField()
|
|
84
83
|
modified_at = db.DateTimeField()
|
|
@@ -91,12 +90,15 @@ class HarvestDatasetMetadata(DynamicEmbeddedDocument):
|
|
|
91
90
|
dct_identifier = db.StringField()
|
|
92
91
|
archived_at = db.DateTimeField()
|
|
93
92
|
archived = db.StringField()
|
|
93
|
+
ckan_name = db.StringField()
|
|
94
|
+
ckan_source = db.StringField()
|
|
94
95
|
|
|
95
96
|
|
|
96
|
-
class HarvestResourceMetadata(
|
|
97
|
+
class HarvestResourceMetadata(db.EmbeddedDocument):
|
|
97
98
|
created_at = db.DateTimeField()
|
|
98
99
|
modified_at = db.DateTimeField()
|
|
99
100
|
uri = db.StringField()
|
|
101
|
+
dct_identifier = db.StringField()
|
|
100
102
|
|
|
101
103
|
|
|
102
104
|
class Schema(db.EmbeddedDocument):
|
udata/core/discussions/api.py
CHANGED
|
@@ -247,7 +247,7 @@ class DiscussionAPI(API):
|
|
|
247
247
|
return "", 204
|
|
248
248
|
|
|
249
249
|
|
|
250
|
-
@ns.route("/<id>/comments/<int:cidx>/spam", endpoint="discussion_comment_spam")
|
|
250
|
+
@ns.route("/<id>/comments/<int:cidx>/spam/", endpoint="discussion_comment_spam")
|
|
251
251
|
@ns.doc(delete={"id": "unspam"})
|
|
252
252
|
class DiscussionCommentSpamAPI(SpamAPIMixin):
|
|
253
253
|
def get_model(self, id, cidx):
|
|
@@ -259,7 +259,7 @@ class DiscussionCommentSpamAPI(SpamAPIMixin):
|
|
|
259
259
|
return discussion, discussion.discussion[cidx]
|
|
260
260
|
|
|
261
261
|
|
|
262
|
-
@ns.route("/<id>/comments/<int:cidx
|
|
262
|
+
@ns.route("/<id>/comments/<int:cidx>/", endpoint="discussion_comment")
|
|
263
263
|
class DiscussionCommentAPI(API):
|
|
264
264
|
"""
|
|
265
265
|
Base class for a comment in a discussion thread.
|
udata/core/jobs/api.py
CHANGED
|
@@ -101,7 +101,7 @@ class JobsAPI(API):
|
|
|
101
101
|
return form.save(), 201
|
|
102
102
|
|
|
103
103
|
|
|
104
|
-
@ns.route("/jobs/<string:id
|
|
104
|
+
@ns.route("/jobs/<string:id>/", endpoint="job")
|
|
105
105
|
@api.param("id", "A job ID")
|
|
106
106
|
class JobAPI(API):
|
|
107
107
|
def get_or_404(self, id):
|
|
@@ -139,7 +139,7 @@ class JobAPI(API):
|
|
|
139
139
|
return "", 204
|
|
140
140
|
|
|
141
141
|
|
|
142
|
-
@ns.route("/tasks/<string:id
|
|
142
|
+
@ns.route("/tasks/<string:id>/", endpoint="task")
|
|
143
143
|
class TaskAPI(API):
|
|
144
144
|
@api.marshal_with(task_fields)
|
|
145
145
|
def get(self, id):
|
|
@@ -159,7 +159,7 @@ class TaskAPI(API):
|
|
|
159
159
|
return data
|
|
160
160
|
|
|
161
161
|
|
|
162
|
-
@ns.route("/jobs/schedulables", endpoint="schedulable_jobs")
|
|
162
|
+
@ns.route("/jobs/schedulables/", endpoint="schedulable_jobs")
|
|
163
163
|
class JobsReferenceAPI(API):
|
|
164
164
|
@api.doc(model=[str])
|
|
165
165
|
def get(self):
|
udata/core/metrics/helpers.py
CHANGED
|
@@ -2,6 +2,7 @@ import logging
|
|
|
2
2
|
from collections import OrderedDict
|
|
3
3
|
from datetime import datetime, timedelta
|
|
4
4
|
from typing import Dict, List, Union
|
|
5
|
+
from urllib.parse import urlencode
|
|
5
6
|
|
|
6
7
|
import requests
|
|
7
8
|
from bson import ObjectId
|
|
@@ -68,6 +69,15 @@ def get_metrics_for_model(
|
|
|
68
69
|
return [{} for _ in range(len(metrics_labels))]
|
|
69
70
|
|
|
70
71
|
|
|
72
|
+
def get_download_url(model: str, id: Union[str, ObjectId, None]) -> str:
|
|
73
|
+
api_namespace = model + "s" if model != "site" else model
|
|
74
|
+
base_url = f"{current_app.config['METRICS_API']}/{api_namespace}/data/csv/"
|
|
75
|
+
args = {"metric_month__sort": "asc"}
|
|
76
|
+
if id:
|
|
77
|
+
args[f"{model}_id__exact"] = id
|
|
78
|
+
return f"{base_url}?{urlencode(args)}"
|
|
79
|
+
|
|
80
|
+
|
|
71
81
|
def compute_monthly_aggregated_metrics(aggregation_res: CommandCursor) -> OrderedDict:
|
|
72
82
|
monthly_metrics = OrderedDict((month, 0) for month in get_last_13_months())
|
|
73
83
|
for monthly_count in aggregation_res:
|
udata/core/metrics/tasks.py
CHANGED
|
@@ -1,9 +1,152 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import time
|
|
3
|
+
from functools import wraps
|
|
4
|
+
from typing import Dict, List
|
|
5
|
+
|
|
6
|
+
import requests
|
|
1
7
|
from flask import current_app
|
|
2
8
|
|
|
9
|
+
from udata.core.dataservices.models import Dataservice
|
|
3
10
|
from udata.core.metrics.signals import on_site_metrics_computed
|
|
4
|
-
from udata.models import Site
|
|
11
|
+
from udata.models import CommunityResource, Dataset, Organization, Reuse, Site, db
|
|
5
12
|
from udata.tasks import job
|
|
6
13
|
|
|
14
|
+
log = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def log_timing(func):
|
|
18
|
+
@wraps(func)
|
|
19
|
+
def timeit_wrapper(*args, **kwargs):
|
|
20
|
+
# Better log if we're using Python 3.9
|
|
21
|
+
name = func.__name__
|
|
22
|
+
model = name.removeprefix("update_") if hasattr(name, "removeprefix") else name
|
|
23
|
+
|
|
24
|
+
log.info(f"Processing {model}…")
|
|
25
|
+
start_time = time.perf_counter()
|
|
26
|
+
result = func(*args, **kwargs)
|
|
27
|
+
total_time = time.perf_counter() - start_time
|
|
28
|
+
log.info(f"Done in {total_time:.4f} seconds.")
|
|
29
|
+
return result
|
|
30
|
+
|
|
31
|
+
return timeit_wrapper
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def save_model(model: db.Document, model_id: str, metrics: Dict[str, int]) -> None:
|
|
35
|
+
try:
|
|
36
|
+
result = model.objects(id=model_id).update(
|
|
37
|
+
**{f"set__metrics__{key}": value for key, value in metrics.items()}
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
if result is None:
|
|
41
|
+
log.debug(f"{model.__name__} not found", extra={"id": model_id})
|
|
42
|
+
except Exception as e:
|
|
43
|
+
log.exception(e)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def iterate_on_metrics(target: str, value_keys: List[str], page_size: int = 50) -> dict:
|
|
47
|
+
"""
|
|
48
|
+
Yield all elements with not zero values for the keys inside `value_keys`.
|
|
49
|
+
If you pass ['visit', 'download_resource'], it will do a `OR` and get
|
|
50
|
+
metrics with one of the two values not zero.
|
|
51
|
+
"""
|
|
52
|
+
yielded = set()
|
|
53
|
+
|
|
54
|
+
for value_key in value_keys:
|
|
55
|
+
url = f"{current_app.config['METRICS_API']}/{target}_total/data/"
|
|
56
|
+
url += f"?{value_key}__greater=1&page_size={page_size}"
|
|
57
|
+
|
|
58
|
+
with requests.Session() as session:
|
|
59
|
+
while url is not None:
|
|
60
|
+
r = session.get(url, timeout=10)
|
|
61
|
+
r.raise_for_status()
|
|
62
|
+
data = r.json()
|
|
63
|
+
|
|
64
|
+
for row in data["data"]:
|
|
65
|
+
if row["__id"] not in yielded:
|
|
66
|
+
yielded.add(row["__id"])
|
|
67
|
+
yield row
|
|
68
|
+
|
|
69
|
+
url = data["links"].get("next")
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@log_timing
|
|
73
|
+
def update_resources_and_community_resources():
|
|
74
|
+
for data in iterate_on_metrics("resources", ["download_resource"]):
|
|
75
|
+
if data["dataset_id"] is None:
|
|
76
|
+
save_model(
|
|
77
|
+
CommunityResource,
|
|
78
|
+
data["resource_id"],
|
|
79
|
+
{
|
|
80
|
+
"views": data["download_resource"],
|
|
81
|
+
},
|
|
82
|
+
)
|
|
83
|
+
else:
|
|
84
|
+
Dataset.objects(resources__id=data["resource_id"]).update(
|
|
85
|
+
**{"set__resources__$__metrics__views": data["download_resource"]}
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@log_timing
|
|
90
|
+
def update_datasets():
|
|
91
|
+
for data in iterate_on_metrics("datasets", ["visit", "download_resource"]):
|
|
92
|
+
save_model(
|
|
93
|
+
Dataset,
|
|
94
|
+
data["dataset_id"],
|
|
95
|
+
{
|
|
96
|
+
"views": data["visit"],
|
|
97
|
+
"resources_downloads": data["download_resource"],
|
|
98
|
+
},
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@log_timing
|
|
103
|
+
def update_dataservices():
|
|
104
|
+
for data in iterate_on_metrics("dataservices", ["visit"]):
|
|
105
|
+
save_model(
|
|
106
|
+
Dataservice,
|
|
107
|
+
data["dataservice_id"],
|
|
108
|
+
{
|
|
109
|
+
"views": data["visit"],
|
|
110
|
+
},
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@log_timing
|
|
115
|
+
def update_reuses():
|
|
116
|
+
for data in iterate_on_metrics("reuses", ["visit"]):
|
|
117
|
+
save_model(Reuse, data["reuse_id"], {"views": data["visit"]})
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@log_timing
|
|
121
|
+
def update_organizations():
|
|
122
|
+
# We're currently using visit_dataset as global metric for an orga
|
|
123
|
+
for data in iterate_on_metrics("organizations", ["visit_dataset"]):
|
|
124
|
+
save_model(
|
|
125
|
+
Organization,
|
|
126
|
+
data["organization_id"],
|
|
127
|
+
{
|
|
128
|
+
"views": data["visit_dataset"],
|
|
129
|
+
},
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def update_metrics_for_models():
|
|
134
|
+
log.info("Starting…")
|
|
135
|
+
update_datasets()
|
|
136
|
+
update_resources_and_community_resources()
|
|
137
|
+
update_dataservices()
|
|
138
|
+
update_reuses()
|
|
139
|
+
update_organizations()
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
@job("update-metrics", route="low.metrics")
|
|
143
|
+
def update_metrics(self):
|
|
144
|
+
"""Update udata objects metrics"""
|
|
145
|
+
if not current_app.config["METRICS_API"]:
|
|
146
|
+
log.error("You need to set METRICS_API to run update-metrics")
|
|
147
|
+
exit(1)
|
|
148
|
+
update_metrics_for_models()
|
|
149
|
+
|
|
7
150
|
|
|
8
151
|
@job("compute-site-metrics")
|
|
9
152
|
def compute_site_metrics(self):
|
udata/core/organization/api.py
CHANGED
|
@@ -99,7 +99,7 @@ class OrgApiParser(ModelApiParser):
|
|
|
99
99
|
if args.get("badge"):
|
|
100
100
|
organizations = organizations.with_badge(args["badge"])
|
|
101
101
|
if args.get("name"):
|
|
102
|
-
organizations = organizations.filter(
|
|
102
|
+
organizations = organizations.filter(name__iexact=args["name"])
|
|
103
103
|
if args.get("business_number_id"):
|
|
104
104
|
organizations = organizations.filter(business_number_id=args["business_number_id"])
|
|
105
105
|
return organizations
|
|
@@ -449,7 +449,7 @@ class MembershipRefuseAPI(MembershipAPI):
|
|
|
449
449
|
return {}, 200
|
|
450
450
|
|
|
451
451
|
|
|
452
|
-
@ns.route("/<org:org>/member/<user:user
|
|
452
|
+
@ns.route("/<org:org>/member/<user:user>/", endpoint="member", doc=common_doc)
|
|
453
453
|
class MemberAPI(API):
|
|
454
454
|
@api.secure
|
|
455
455
|
@api.expect(member_fields)
|
udata/core/post/api.py
CHANGED
|
@@ -168,7 +168,7 @@ class PostAPI(API):
|
|
|
168
168
|
return "", 204
|
|
169
169
|
|
|
170
170
|
|
|
171
|
-
@ns.route("/<post:post>/publish", endpoint="publish_post")
|
|
171
|
+
@ns.route("/<post:post>/publish/", endpoint="publish_post")
|
|
172
172
|
class PublishPostAPI(API):
|
|
173
173
|
@api.secure(admin_permission)
|
|
174
174
|
@api.doc("publish_post")
|
udata/core/user/api.py
CHANGED
udata/features/identicon/api.py
CHANGED
|
@@ -5,7 +5,7 @@ from . import backends
|
|
|
5
5
|
ns = api.namespace("avatars", "Avatars")
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
@ns.route("/<identifier>/<int:size
|
|
8
|
+
@ns.route("/<identifier>/<int:size>/", endpoint="avatar")
|
|
9
9
|
class IdenticonAPI(API):
|
|
10
10
|
@api.doc("avatars")
|
|
11
11
|
def get(self, identifier, size):
|
udata/harvest/actions.py
CHANGED
|
@@ -29,6 +29,11 @@ log = logging.getLogger(__name__)
|
|
|
29
29
|
DEFAULT_PAGE_SIZE = 10
|
|
30
30
|
|
|
31
31
|
|
|
32
|
+
def get_source(ident):
|
|
33
|
+
"""Get an harvest source given its ID or its slug"""
|
|
34
|
+
return HarvestSource.get(ident)
|
|
35
|
+
|
|
36
|
+
|
|
32
37
|
def list_backends():
|
|
33
38
|
"""List all available backends"""
|
|
34
39
|
return backends.get_all(current_app).values()
|
|
@@ -44,11 +49,6 @@ def list_sources(owner=None, deleted=False):
|
|
|
44
49
|
return list(sources)
|
|
45
50
|
|
|
46
51
|
|
|
47
|
-
def get_source(ident):
|
|
48
|
-
"""Get an harvest source given its ID or its slug"""
|
|
49
|
-
return HarvestSource.get(ident)
|
|
50
|
-
|
|
51
|
-
|
|
52
52
|
def get_job(ident):
|
|
53
53
|
"""Get an harvest job given its ID"""
|
|
54
54
|
return HarvestJob.objects.get(id=ident)
|
|
@@ -89,31 +89,28 @@ def create_source(
|
|
|
89
89
|
return source
|
|
90
90
|
|
|
91
91
|
|
|
92
|
-
def update_source(
|
|
92
|
+
def update_source(source: HarvestSource, data):
|
|
93
93
|
"""Update an harvest source"""
|
|
94
|
-
source = get_source(ident)
|
|
95
94
|
source.modify(**data)
|
|
96
95
|
signals.harvest_source_updated.send(source)
|
|
97
96
|
return source
|
|
98
97
|
|
|
99
98
|
|
|
100
|
-
def validate_source(
|
|
99
|
+
def validate_source(source: HarvestSource, comment=None):
|
|
101
100
|
"""Validate a source for automatic harvesting"""
|
|
102
|
-
source = get_source(ident)
|
|
103
101
|
source.validation.on = datetime.utcnow()
|
|
104
102
|
source.validation.comment = comment
|
|
105
103
|
source.validation.state = VALIDATION_ACCEPTED
|
|
106
104
|
if current_user.is_authenticated:
|
|
107
105
|
source.validation.by = current_user._get_current_object()
|
|
108
106
|
source.save()
|
|
109
|
-
schedule(
|
|
110
|
-
launch(
|
|
107
|
+
schedule(source, cron=current_app.config["HARVEST_DEFAULT_SCHEDULE"])
|
|
108
|
+
launch(source)
|
|
111
109
|
return source
|
|
112
110
|
|
|
113
111
|
|
|
114
|
-
def reject_source(
|
|
112
|
+
def reject_source(source: HarvestSource, comment):
|
|
115
113
|
"""Reject a source for automatic harvesting"""
|
|
116
|
-
source = get_source(ident)
|
|
117
114
|
source.validation.on = datetime.utcnow()
|
|
118
115
|
source.validation.comment = comment
|
|
119
116
|
source.validation.state = VALIDATION_REFUSED
|
|
@@ -123,18 +120,16 @@ def reject_source(ident, comment):
|
|
|
123
120
|
return source
|
|
124
121
|
|
|
125
122
|
|
|
126
|
-
def delete_source(
|
|
123
|
+
def delete_source(source: HarvestSource):
|
|
127
124
|
"""Delete an harvest source"""
|
|
128
|
-
source = get_source(ident)
|
|
129
125
|
source.deleted = datetime.utcnow()
|
|
130
126
|
source.save()
|
|
131
127
|
signals.harvest_source_deleted.send(source)
|
|
132
128
|
return source
|
|
133
129
|
|
|
134
130
|
|
|
135
|
-
def clean_source(
|
|
131
|
+
def clean_source(source: HarvestSource):
|
|
136
132
|
"""Deletes all datasets linked to a harvest source"""
|
|
137
|
-
source = get_source(ident)
|
|
138
133
|
datasets = Dataset.objects.filter(harvest__source_id=str(source.id))
|
|
139
134
|
for dataset in datasets:
|
|
140
135
|
dataset.deleted = datetime.utcnow()
|
|
@@ -180,22 +175,20 @@ def purge_jobs():
|
|
|
180
175
|
return HarvestJob.objects(created__lt=expiration).delete()
|
|
181
176
|
|
|
182
177
|
|
|
183
|
-
def run(
|
|
178
|
+
def run(source: HarvestSource):
|
|
184
179
|
"""Launch or resume an harvesting for a given source if none is running"""
|
|
185
|
-
source = get_source(ident)
|
|
186
180
|
cls = backends.get(current_app, source.backend)
|
|
187
181
|
backend = cls(source)
|
|
188
182
|
backend.harvest()
|
|
189
183
|
|
|
190
184
|
|
|
191
|
-
def launch(
|
|
185
|
+
def launch(source: HarvestSource):
|
|
192
186
|
"""Launch or resume an harvesting for a given source if none is running"""
|
|
193
|
-
return harvest.delay(
|
|
187
|
+
return harvest.delay(source.id)
|
|
194
188
|
|
|
195
189
|
|
|
196
|
-
def preview(
|
|
190
|
+
def preview(source: HarvestSource):
|
|
197
191
|
"""Preview an harvesting for a given source"""
|
|
198
|
-
source = get_source(ident)
|
|
199
192
|
cls = backends.get(current_app, source.backend)
|
|
200
193
|
max_items = current_app.config["HARVEST_PREVIEW_MAX_ITEMS"]
|
|
201
194
|
backend = cls(source, dryrun=True, max_items=max_items)
|
|
@@ -240,11 +233,15 @@ def preview_from_config(
|
|
|
240
233
|
|
|
241
234
|
|
|
242
235
|
def schedule(
|
|
243
|
-
|
|
236
|
+
source: HarvestSource,
|
|
237
|
+
cron=None,
|
|
238
|
+
minute="*",
|
|
239
|
+
hour="*",
|
|
240
|
+
day_of_week="*",
|
|
241
|
+
day_of_month="*",
|
|
242
|
+
month_of_year="*",
|
|
244
243
|
):
|
|
245
244
|
"""Schedule an harvesting on a source given a crontab"""
|
|
246
|
-
source = get_source(ident)
|
|
247
|
-
|
|
248
245
|
if cron:
|
|
249
246
|
minute, hour, day_of_month, month_of_year, day_of_week = cron.split()
|
|
250
247
|
|
|
@@ -273,9 +270,8 @@ def schedule(
|
|
|
273
270
|
return source
|
|
274
271
|
|
|
275
272
|
|
|
276
|
-
def unschedule(
|
|
273
|
+
def unschedule(source: HarvestSource):
|
|
277
274
|
"""Unschedule an harvesting on a source"""
|
|
278
|
-
source = get_source(ident)
|
|
279
275
|
if not source.periodic_task:
|
|
280
276
|
msg = "Harvesting on source {0} is ot scheduled".format(source.name)
|
|
281
277
|
raise ValueError(msg)
|