udata 9.1.2.dev30355__py2.py3-none-any.whl → 9.1.2.dev30454__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of udata might be problematic. Click here for more details.
- tasks/__init__.py +109 -107
- tasks/helpers.py +18 -18
- udata/__init__.py +4 -4
- udata/admin/views.py +5 -5
- udata/api/__init__.py +111 -134
- udata/api/commands.py +45 -37
- udata/api/errors.py +5 -4
- udata/api/fields.py +23 -21
- udata/api/oauth2.py +55 -74
- udata/api/parsers.py +15 -15
- udata/api/signals.py +1 -1
- udata/api_fields.py +137 -89
- udata/app.py +58 -55
- udata/assets.py +5 -5
- udata/auth/__init__.py +37 -26
- udata/auth/forms.py +23 -15
- udata/auth/helpers.py +1 -1
- udata/auth/mails.py +3 -3
- udata/auth/password_validation.py +19 -15
- udata/auth/views.py +94 -68
- udata/commands/__init__.py +71 -69
- udata/commands/cache.py +7 -7
- udata/commands/db.py +201 -140
- udata/commands/dcat.py +36 -30
- udata/commands/fixtures.py +100 -84
- udata/commands/images.py +21 -20
- udata/commands/info.py +17 -20
- udata/commands/init.py +10 -10
- udata/commands/purge.py +12 -13
- udata/commands/serve.py +41 -29
- udata/commands/static.py +16 -18
- udata/commands/test.py +20 -20
- udata/commands/tests/fixtures.py +26 -24
- udata/commands/worker.py +31 -33
- udata/core/__init__.py +12 -12
- udata/core/activity/__init__.py +0 -1
- udata/core/activity/api.py +59 -49
- udata/core/activity/models.py +28 -26
- udata/core/activity/signals.py +1 -1
- udata/core/activity/tasks.py +16 -10
- udata/core/badges/api.py +6 -6
- udata/core/badges/commands.py +14 -13
- udata/core/badges/fields.py +8 -5
- udata/core/badges/forms.py +7 -4
- udata/core/badges/models.py +16 -31
- udata/core/badges/permissions.py +1 -3
- udata/core/badges/signals.py +2 -2
- udata/core/badges/tasks.py +3 -2
- udata/core/badges/tests/test_commands.py +10 -10
- udata/core/badges/tests/test_model.py +24 -31
- udata/core/contact_point/api.py +19 -18
- udata/core/contact_point/api_fields.py +21 -14
- udata/core/contact_point/factories.py +2 -2
- udata/core/contact_point/forms.py +7 -6
- udata/core/contact_point/models.py +3 -5
- udata/core/dataservices/api.py +26 -21
- udata/core/dataservices/factories.py +13 -11
- udata/core/dataservices/models.py +35 -40
- udata/core/dataservices/permissions.py +4 -4
- udata/core/dataservices/rdf.py +40 -17
- udata/core/dataservices/tasks.py +4 -3
- udata/core/dataset/actions.py +10 -10
- udata/core/dataset/activities.py +21 -23
- udata/core/dataset/api.py +321 -298
- udata/core/dataset/api_fields.py +443 -271
- udata/core/dataset/apiv2.py +305 -229
- udata/core/dataset/commands.py +38 -36
- udata/core/dataset/constants.py +61 -54
- udata/core/dataset/csv.py +70 -74
- udata/core/dataset/events.py +39 -32
- udata/core/dataset/exceptions.py +8 -4
- udata/core/dataset/factories.py +57 -65
- udata/core/dataset/forms.py +87 -63
- udata/core/dataset/models.py +336 -280
- udata/core/dataset/permissions.py +9 -6
- udata/core/dataset/preview.py +15 -17
- udata/core/dataset/rdf.py +156 -122
- udata/core/dataset/search.py +92 -77
- udata/core/dataset/signals.py +1 -1
- udata/core/dataset/tasks.py +63 -54
- udata/core/discussions/actions.py +5 -5
- udata/core/discussions/api.py +124 -120
- udata/core/discussions/factories.py +2 -2
- udata/core/discussions/forms.py +9 -7
- udata/core/discussions/metrics.py +1 -3
- udata/core/discussions/models.py +25 -24
- udata/core/discussions/notifications.py +18 -14
- udata/core/discussions/permissions.py +3 -3
- udata/core/discussions/signals.py +4 -4
- udata/core/discussions/tasks.py +24 -28
- udata/core/followers/api.py +32 -33
- udata/core/followers/models.py +9 -9
- udata/core/followers/signals.py +3 -3
- udata/core/jobs/actions.py +7 -7
- udata/core/jobs/api.py +99 -92
- udata/core/jobs/commands.py +48 -49
- udata/core/jobs/forms.py +11 -11
- udata/core/jobs/models.py +6 -6
- udata/core/metrics/__init__.py +2 -2
- udata/core/metrics/commands.py +34 -30
- udata/core/metrics/models.py +2 -4
- udata/core/metrics/signals.py +1 -1
- udata/core/metrics/tasks.py +3 -3
- udata/core/organization/activities.py +12 -15
- udata/core/organization/api.py +167 -174
- udata/core/organization/api_fields.py +183 -124
- udata/core/organization/apiv2.py +32 -32
- udata/core/organization/commands.py +20 -22
- udata/core/organization/constants.py +11 -11
- udata/core/organization/csv.py +17 -15
- udata/core/organization/factories.py +8 -11
- udata/core/organization/forms.py +32 -26
- udata/core/organization/metrics.py +2 -1
- udata/core/organization/models.py +87 -67
- udata/core/organization/notifications.py +18 -14
- udata/core/organization/permissions.py +10 -11
- udata/core/organization/rdf.py +14 -14
- udata/core/organization/search.py +30 -28
- udata/core/organization/signals.py +7 -7
- udata/core/organization/tasks.py +42 -61
- udata/core/owned.py +38 -27
- udata/core/post/api.py +82 -81
- udata/core/post/constants.py +8 -5
- udata/core/post/factories.py +4 -4
- udata/core/post/forms.py +13 -14
- udata/core/post/models.py +20 -22
- udata/core/post/tests/test_api.py +30 -32
- udata/core/reports/api.py +8 -7
- udata/core/reports/constants.py +1 -3
- udata/core/reports/models.py +10 -10
- udata/core/reuse/activities.py +15 -19
- udata/core/reuse/api.py +123 -126
- udata/core/reuse/api_fields.py +120 -85
- udata/core/reuse/apiv2.py +11 -10
- udata/core/reuse/constants.py +23 -23
- udata/core/reuse/csv.py +18 -18
- udata/core/reuse/factories.py +5 -9
- udata/core/reuse/forms.py +24 -21
- udata/core/reuse/models.py +55 -51
- udata/core/reuse/permissions.py +2 -2
- udata/core/reuse/search.py +49 -46
- udata/core/reuse/signals.py +1 -1
- udata/core/reuse/tasks.py +4 -5
- udata/core/site/api.py +47 -50
- udata/core/site/factories.py +2 -2
- udata/core/site/forms.py +4 -5
- udata/core/site/models.py +94 -63
- udata/core/site/rdf.py +14 -14
- udata/core/spam/api.py +16 -9
- udata/core/spam/constants.py +4 -4
- udata/core/spam/fields.py +13 -7
- udata/core/spam/models.py +27 -20
- udata/core/spam/signals.py +1 -1
- udata/core/spam/tests/test_spam.py +6 -5
- udata/core/spatial/api.py +72 -80
- udata/core/spatial/api_fields.py +73 -58
- udata/core/spatial/commands.py +67 -64
- udata/core/spatial/constants.py +3 -3
- udata/core/spatial/factories.py +37 -54
- udata/core/spatial/forms.py +27 -26
- udata/core/spatial/geoids.py +17 -17
- udata/core/spatial/models.py +43 -47
- udata/core/spatial/tasks.py +2 -1
- udata/core/spatial/tests/test_api.py +115 -130
- udata/core/spatial/tests/test_fields.py +74 -77
- udata/core/spatial/tests/test_geoid.py +22 -22
- udata/core/spatial/tests/test_models.py +5 -7
- udata/core/spatial/translations.py +16 -16
- udata/core/storages/__init__.py +16 -18
- udata/core/storages/api.py +66 -64
- udata/core/storages/tasks.py +7 -7
- udata/core/storages/utils.py +15 -15
- udata/core/storages/views.py +5 -6
- udata/core/tags/api.py +17 -14
- udata/core/tags/csv.py +4 -4
- udata/core/tags/models.py +8 -5
- udata/core/tags/tasks.py +11 -13
- udata/core/tags/views.py +4 -4
- udata/core/topic/api.py +84 -73
- udata/core/topic/apiv2.py +157 -127
- udata/core/topic/factories.py +3 -4
- udata/core/topic/forms.py +12 -14
- udata/core/topic/models.py +14 -19
- udata/core/topic/parsers.py +26 -26
- udata/core/user/activities.py +30 -29
- udata/core/user/api.py +151 -152
- udata/core/user/api_fields.py +132 -100
- udata/core/user/apiv2.py +7 -7
- udata/core/user/commands.py +38 -38
- udata/core/user/factories.py +8 -9
- udata/core/user/forms.py +14 -11
- udata/core/user/metrics.py +2 -2
- udata/core/user/models.py +68 -69
- udata/core/user/permissions.py +4 -5
- udata/core/user/rdf.py +7 -8
- udata/core/user/tasks.py +2 -2
- udata/core/user/tests/test_user_model.py +24 -16
- udata/cors.py +99 -0
- udata/db/tasks.py +2 -1
- udata/entrypoints.py +35 -31
- udata/errors.py +2 -1
- udata/event/values.py +6 -6
- udata/factories.py +2 -2
- udata/features/identicon/api.py +5 -6
- udata/features/identicon/backends.py +48 -55
- udata/features/identicon/tests/test_backends.py +4 -5
- udata/features/notifications/__init__.py +0 -1
- udata/features/notifications/actions.py +9 -9
- udata/features/notifications/api.py +17 -13
- udata/features/territories/__init__.py +12 -10
- udata/features/territories/api.py +14 -15
- udata/features/territories/models.py +23 -28
- udata/features/transfer/actions.py +8 -11
- udata/features/transfer/api.py +84 -77
- udata/features/transfer/factories.py +2 -1
- udata/features/transfer/models.py +11 -12
- udata/features/transfer/notifications.py +19 -15
- udata/features/transfer/permissions.py +5 -5
- udata/forms/__init__.py +5 -2
- udata/forms/fields.py +164 -172
- udata/forms/validators.py +19 -22
- udata/forms/widgets.py +9 -13
- udata/frontend/__init__.py +31 -26
- udata/frontend/csv.py +68 -58
- udata/frontend/markdown.py +40 -44
- udata/harvest/actions.py +89 -77
- udata/harvest/api.py +294 -238
- udata/harvest/backends/__init__.py +4 -4
- udata/harvest/backends/base.py +128 -111
- udata/harvest/backends/dcat.py +80 -66
- udata/harvest/commands.py +56 -60
- udata/harvest/csv.py +8 -8
- udata/harvest/exceptions.py +6 -3
- udata/harvest/filters.py +24 -23
- udata/harvest/forms.py +27 -28
- udata/harvest/models.py +88 -80
- udata/harvest/notifications.py +15 -10
- udata/harvest/signals.py +13 -13
- udata/harvest/tasks.py +11 -10
- udata/harvest/tests/factories.py +23 -24
- udata/harvest/tests/test_actions.py +136 -166
- udata/harvest/tests/test_api.py +220 -214
- udata/harvest/tests/test_base_backend.py +117 -112
- udata/harvest/tests/test_dcat_backend.py +380 -308
- udata/harvest/tests/test_filters.py +33 -22
- udata/harvest/tests/test_models.py +11 -14
- udata/harvest/tests/test_notifications.py +6 -7
- udata/harvest/tests/test_tasks.py +7 -6
- udata/i18n.py +237 -78
- udata/linkchecker/backends.py +5 -11
- udata/linkchecker/checker.py +23 -22
- udata/linkchecker/commands.py +4 -6
- udata/linkchecker/models.py +6 -6
- udata/linkchecker/tasks.py +18 -20
- udata/mail.py +21 -21
- udata/migrations/2020-07-24-remove-s-from-scope-oauth.py +9 -8
- udata/migrations/2020-08-24-add-fs-filename.py +9 -8
- udata/migrations/2020-09-28-update-reuses-datasets-metrics.py +5 -4
- udata/migrations/2020-10-16-migrate-ods-resources.py +9 -10
- udata/migrations/2021-04-08-update-schema-with-new-structure.py +8 -7
- udata/migrations/2021-05-27-fix-default-schema-name.py +7 -6
- udata/migrations/2021-07-05-remove-unused-badges.py +17 -15
- udata/migrations/2021-07-07-update-schema-for-community-resources.py +7 -6
- udata/migrations/2021-08-17-follow-integrity.py +5 -4
- udata/migrations/2021-08-17-harvest-integrity.py +13 -12
- udata/migrations/2021-08-17-oauth2client-integrity.py +5 -4
- udata/migrations/2021-08-17-transfer-integrity.py +5 -4
- udata/migrations/2021-08-17-users-integrity.py +9 -8
- udata/migrations/2021-12-14-reuse-topics.py +7 -6
- udata/migrations/2022-04-21-improve-extension-detection.py +8 -7
- udata/migrations/2022-09-22-clean-inactive-harvest-datasets.py +16 -14
- udata/migrations/2022-10-10-add-fs_uniquifier-to-user-model.py +6 -6
- udata/migrations/2022-10-10-migrate-harvest-extras.py +36 -26
- udata/migrations/2023-02-08-rename-internal-dates.py +46 -28
- udata/migrations/2024-01-29-fix-reuse-and-dataset-with-private-None.py +10 -8
- udata/migrations/2024-03-22-migrate-activity-kwargs-to-extras.py +6 -4
- udata/migrations/2024-06-11-fix-reuse-datasets-references.py +7 -6
- udata/migrations/__init__.py +123 -105
- udata/models/__init__.py +4 -4
- udata/mongo/__init__.py +13 -11
- udata/mongo/badges_field.py +3 -2
- udata/mongo/datetime_fields.py +13 -12
- udata/mongo/document.py +17 -16
- udata/mongo/engine.py +15 -16
- udata/mongo/errors.py +2 -1
- udata/mongo/extras_fields.py +30 -20
- udata/mongo/queryset.py +12 -12
- udata/mongo/slug_fields.py +38 -28
- udata/mongo/taglist_field.py +1 -2
- udata/mongo/url_field.py +5 -5
- udata/mongo/uuid_fields.py +4 -3
- udata/notifications/__init__.py +1 -1
- udata/notifications/mattermost.py +10 -9
- udata/rdf.py +167 -188
- udata/routing.py +40 -45
- udata/search/__init__.py +18 -19
- udata/search/adapter.py +17 -16
- udata/search/commands.py +44 -51
- udata/search/fields.py +13 -20
- udata/search/query.py +23 -18
- udata/search/result.py +9 -10
- udata/sentry.py +21 -19
- udata/settings.py +262 -198
- udata/sitemap.py +8 -6
- udata/storage/s3.py +20 -13
- udata/tags.py +4 -5
- udata/tasks.py +43 -42
- udata/tests/__init__.py +9 -6
- udata/tests/api/__init__.py +8 -6
- udata/tests/api/test_auth_api.py +395 -321
- udata/tests/api/test_base_api.py +33 -35
- udata/tests/api/test_contact_points.py +7 -9
- udata/tests/api/test_dataservices_api.py +211 -158
- udata/tests/api/test_datasets_api.py +823 -812
- udata/tests/api/test_follow_api.py +13 -15
- udata/tests/api/test_me_api.py +95 -112
- udata/tests/api/test_organizations_api.py +301 -339
- udata/tests/api/test_reports_api.py +35 -25
- udata/tests/api/test_reuses_api.py +134 -139
- udata/tests/api/test_swagger.py +5 -5
- udata/tests/api/test_tags_api.py +18 -25
- udata/tests/api/test_topics_api.py +94 -94
- udata/tests/api/test_transfer_api.py +53 -48
- udata/tests/api/test_user_api.py +128 -141
- udata/tests/apiv2/test_datasets.py +290 -198
- udata/tests/apiv2/test_me_api.py +10 -11
- udata/tests/apiv2/test_organizations.py +56 -74
- udata/tests/apiv2/test_swagger.py +5 -5
- udata/tests/apiv2/test_topics.py +69 -87
- udata/tests/cli/test_cli_base.py +8 -8
- udata/tests/cli/test_db_cli.py +21 -19
- udata/tests/dataservice/test_dataservice_tasks.py +8 -12
- udata/tests/dataset/test_csv_adapter.py +44 -35
- udata/tests/dataset/test_dataset_actions.py +2 -3
- udata/tests/dataset/test_dataset_commands.py +7 -8
- udata/tests/dataset/test_dataset_events.py +36 -29
- udata/tests/dataset/test_dataset_model.py +224 -217
- udata/tests/dataset/test_dataset_rdf.py +142 -131
- udata/tests/dataset/test_dataset_tasks.py +15 -15
- udata/tests/dataset/test_resource_preview.py +10 -13
- udata/tests/features/territories/__init__.py +9 -13
- udata/tests/features/territories/test_territories_api.py +71 -91
- udata/tests/forms/test_basic_fields.py +7 -7
- udata/tests/forms/test_current_user_field.py +39 -66
- udata/tests/forms/test_daterange_field.py +31 -39
- udata/tests/forms/test_dict_field.py +28 -26
- udata/tests/forms/test_extras_fields.py +102 -76
- udata/tests/forms/test_form_field.py +8 -8
- udata/tests/forms/test_image_field.py +33 -26
- udata/tests/forms/test_model_field.py +134 -123
- udata/tests/forms/test_model_list_field.py +7 -7
- udata/tests/forms/test_nested_model_list_field.py +117 -79
- udata/tests/forms/test_publish_as_field.py +36 -65
- udata/tests/forms/test_reference_field.py +34 -53
- udata/tests/forms/test_user_forms.py +23 -21
- udata/tests/forms/test_uuid_field.py +6 -10
- udata/tests/frontend/__init__.py +9 -6
- udata/tests/frontend/test_auth.py +7 -6
- udata/tests/frontend/test_csv.py +81 -96
- udata/tests/frontend/test_hooks.py +43 -43
- udata/tests/frontend/test_markdown.py +211 -191
- udata/tests/helpers.py +32 -37
- udata/tests/models.py +2 -2
- udata/tests/organization/test_csv_adapter.py +21 -16
- udata/tests/organization/test_notifications.py +11 -18
- udata/tests/organization/test_organization_model.py +13 -13
- udata/tests/organization/test_organization_rdf.py +29 -22
- udata/tests/organization/test_organization_tasks.py +16 -17
- udata/tests/plugin.py +79 -73
- udata/tests/reuse/test_reuse_model.py +21 -21
- udata/tests/reuse/test_reuse_task.py +11 -13
- udata/tests/search/__init__.py +11 -12
- udata/tests/search/test_adapter.py +60 -70
- udata/tests/search/test_query.py +16 -16
- udata/tests/search/test_results.py +10 -7
- udata/tests/site/test_site_api.py +11 -16
- udata/tests/site/test_site_metrics.py +20 -30
- udata/tests/site/test_site_model.py +4 -5
- udata/tests/site/test_site_rdf.py +94 -78
- udata/tests/test_activity.py +17 -17
- udata/tests/test_cors.py +62 -0
- udata/tests/test_discussions.py +292 -299
- udata/tests/test_i18n.py +37 -40
- udata/tests/test_linkchecker.py +91 -85
- udata/tests/test_mail.py +13 -17
- udata/tests/test_migrations.py +219 -180
- udata/tests/test_model.py +164 -157
- udata/tests/test_notifications.py +17 -17
- udata/tests/test_owned.py +14 -14
- udata/tests/test_rdf.py +25 -23
- udata/tests/test_routing.py +89 -93
- udata/tests/test_storages.py +137 -128
- udata/tests/test_tags.py +44 -46
- udata/tests/test_topics.py +7 -7
- udata/tests/test_transfer.py +42 -49
- udata/tests/test_uris.py +160 -161
- udata/tests/test_utils.py +79 -71
- udata/tests/user/test_user_rdf.py +5 -9
- udata/tests/workers/test_jobs_commands.py +57 -58
- udata/tests/workers/test_tasks_routing.py +23 -29
- udata/tests/workers/test_workers_api.py +125 -131
- udata/tests/workers/test_workers_helpers.py +6 -6
- udata/tracking.py +4 -6
- udata/uris.py +45 -46
- udata/utils.py +68 -66
- udata/wsgi.py +1 -1
- {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30454.dist-info}/METADATA +7 -3
- udata-9.1.2.dev30454.dist-info/RECORD +706 -0
- udata-9.1.2.dev30355.dist-info/RECORD +0 -704
- {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30454.dist-info}/LICENSE +0 -0
- {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30454.dist-info}/WHEEL +0 -0
- {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30454.dist-info}/entry_points.txt +0 -0
- {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30454.dist-info}/top_level.txt +0 -0
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
from udata.entrypoints import
|
|
1
|
+
from udata.entrypoints import EntrypointError, get_enabled
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
def get(app, name):
|
|
5
|
-
|
|
5
|
+
"""Get a backend given its name"""
|
|
6
6
|
backend = get_all(app).get(name)
|
|
7
7
|
if not backend:
|
|
8
8
|
msg = 'Harvest backend "{0}" is not registered'.format(name)
|
|
@@ -11,7 +11,7 @@ def get(app, name):
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def get_all(app):
|
|
14
|
-
return get_enabled(
|
|
14
|
+
return get_enabled("udata.harvesters", app)
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
from .base import BaseBackend,
|
|
17
|
+
from .base import BaseBackend, HarvestFeature, HarvestFilter # flake8: noqa
|
udata/harvest/backends/base.py
CHANGED
|
@@ -1,23 +1,27 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import traceback
|
|
3
|
-
|
|
4
|
-
from datetime import datetime, date, timedelta
|
|
3
|
+
from datetime import date, datetime, timedelta
|
|
5
4
|
from uuid import UUID
|
|
6
5
|
|
|
7
6
|
import requests
|
|
8
|
-
|
|
9
7
|
from flask import current_app
|
|
10
|
-
from udata.core.dataservices.models import Dataservice
|
|
11
8
|
from voluptuous import MultipleInvalid, RequiredFieldInvalid
|
|
12
9
|
|
|
13
|
-
from udata.core.
|
|
10
|
+
from udata.core.dataservices.models import Dataservice
|
|
14
11
|
from udata.core.dataservices.models import HarvestMetadata as HarvestDataserviceMetadata
|
|
12
|
+
from udata.core.dataset.models import HarvestDatasetMetadata
|
|
15
13
|
from udata.models import Dataset
|
|
16
14
|
from udata.utils import safe_unicode
|
|
17
15
|
|
|
18
16
|
from ..exceptions import HarvestException, HarvestSkipException, HarvestValidationError
|
|
19
|
-
from ..models import
|
|
20
|
-
|
|
17
|
+
from ..models import (
|
|
18
|
+
HarvestError,
|
|
19
|
+
HarvestItem,
|
|
20
|
+
HarvestJob,
|
|
21
|
+
HarvestLog,
|
|
22
|
+
archive_harvested_dataset,
|
|
23
|
+
)
|
|
24
|
+
from ..signals import after_harvest_job, before_harvest_job
|
|
21
25
|
|
|
22
26
|
log = logging.getLogger(__name__)
|
|
23
27
|
|
|
@@ -27,18 +31,18 @@ requests.packages.urllib3.disable_warnings()
|
|
|
27
31
|
|
|
28
32
|
class HarvestFilter(object):
|
|
29
33
|
TYPES = {
|
|
30
|
-
str:
|
|
31
|
-
bytes:
|
|
32
|
-
int:
|
|
33
|
-
bool:
|
|
34
|
-
UUID:
|
|
35
|
-
datetime:
|
|
36
|
-
date:
|
|
34
|
+
str: "string",
|
|
35
|
+
bytes: "string",
|
|
36
|
+
int: "integer",
|
|
37
|
+
bool: "boolean",
|
|
38
|
+
UUID: "uuid",
|
|
39
|
+
datetime: "date-time",
|
|
40
|
+
date: "date",
|
|
37
41
|
}
|
|
38
42
|
|
|
39
43
|
def __init__(self, label, key, type, description=None):
|
|
40
44
|
if type not in self.TYPES:
|
|
41
|
-
raise TypeError(
|
|
45
|
+
raise TypeError("Unsupported type {0}".format(type))
|
|
42
46
|
self.label = label
|
|
43
47
|
self.key = key
|
|
44
48
|
self.type = type
|
|
@@ -46,10 +50,10 @@ class HarvestFilter(object):
|
|
|
46
50
|
|
|
47
51
|
def as_dict(self):
|
|
48
52
|
return {
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
+
"label": self.label,
|
|
54
|
+
"key": self.key,
|
|
55
|
+
"type": self.TYPES[self.type],
|
|
56
|
+
"description": self.description,
|
|
53
57
|
}
|
|
54
58
|
|
|
55
59
|
|
|
@@ -62,10 +66,10 @@ class HarvestFeature(object):
|
|
|
62
66
|
|
|
63
67
|
def as_dict(self):
|
|
64
68
|
return {
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
+
"key": self.key,
|
|
70
|
+
"label": self.label,
|
|
71
|
+
"description": self.description,
|
|
72
|
+
"default": self.default,
|
|
69
73
|
}
|
|
70
74
|
|
|
71
75
|
|
|
@@ -95,7 +99,7 @@ class BaseBackend(object):
|
|
|
95
99
|
self.source = source_or_job
|
|
96
100
|
self.job = None
|
|
97
101
|
self.dryrun = dryrun
|
|
98
|
-
self.max_items = max_items or current_app.config[
|
|
102
|
+
self.max_items = max_items or current_app.config["HARVEST_MAX_ITEMS"]
|
|
99
103
|
|
|
100
104
|
@property
|
|
101
105
|
def config(self):
|
|
@@ -103,38 +107,38 @@ class BaseBackend(object):
|
|
|
103
107
|
|
|
104
108
|
def head(self, url, headers={}, **kwargs):
|
|
105
109
|
headers.update(self.get_headers())
|
|
106
|
-
kwargs[
|
|
110
|
+
kwargs["verify"] = kwargs.get("verify", self.verify_ssl)
|
|
107
111
|
return requests.head(url, headers=headers, **kwargs)
|
|
108
112
|
|
|
109
113
|
def get(self, url, headers={}, **kwargs):
|
|
110
114
|
headers.update(self.get_headers())
|
|
111
|
-
kwargs[
|
|
115
|
+
kwargs["verify"] = kwargs.get("verify", self.verify_ssl)
|
|
112
116
|
return requests.get(url, headers=headers, **kwargs)
|
|
113
117
|
|
|
114
118
|
def post(self, url, data, headers={}, **kwargs):
|
|
115
119
|
headers.update(self.get_headers())
|
|
116
|
-
kwargs[
|
|
120
|
+
kwargs["verify"] = kwargs.get("verify", self.verify_ssl)
|
|
117
121
|
return requests.post(url, data=data, headers=headers, **kwargs)
|
|
118
122
|
|
|
119
123
|
def get_headers(self):
|
|
120
124
|
return {
|
|
121
125
|
# TODO: extract site title and version
|
|
122
|
-
|
|
126
|
+
"User-Agent": "uData/0.1 {0.name}".format(self),
|
|
123
127
|
}
|
|
124
128
|
|
|
125
129
|
def has_feature(self, key):
|
|
126
130
|
try:
|
|
127
131
|
feature = next(f for f in self.features if f.key == key)
|
|
128
132
|
except StopIteration:
|
|
129
|
-
raise HarvestException(
|
|
130
|
-
return self.config.get(
|
|
133
|
+
raise HarvestException("Unknown feature {}".format(key))
|
|
134
|
+
return self.config.get("features", {}).get(key, feature.default)
|
|
131
135
|
|
|
132
136
|
def get_filters(self):
|
|
133
|
-
return self.config.get(
|
|
137
|
+
return self.config.get("filters", [])
|
|
134
138
|
|
|
135
139
|
def inner_harvest(self):
|
|
136
140
|
raise NotImplementedError
|
|
137
|
-
|
|
141
|
+
|
|
138
142
|
def inner_process_dataset(self, item: HarvestItem) -> Dataset:
|
|
139
143
|
raise NotImplementedError
|
|
140
144
|
|
|
@@ -142,11 +146,9 @@ class BaseBackend(object):
|
|
|
142
146
|
raise NotImplementedError
|
|
143
147
|
|
|
144
148
|
def harvest(self):
|
|
145
|
-
log.debug(f
|
|
149
|
+
log.debug(f"Starting harvesting {self.source.name} ({self.source.url})…")
|
|
146
150
|
factory = HarvestJob if self.dryrun else HarvestJob.objects.create
|
|
147
|
-
self.job = factory(status=
|
|
148
|
-
started=datetime.utcnow(),
|
|
149
|
-
source=self.source)
|
|
151
|
+
self.job = factory(status="initialized", started=datetime.utcnow(), source=self.source)
|
|
150
152
|
|
|
151
153
|
before_harvest_job.send(self)
|
|
152
154
|
|
|
@@ -156,34 +158,38 @@ class BaseBackend(object):
|
|
|
156
158
|
if self.source.autoarchive:
|
|
157
159
|
self.autoarchive()
|
|
158
160
|
|
|
159
|
-
self.job.status =
|
|
161
|
+
self.job.status = "done"
|
|
160
162
|
|
|
161
|
-
if any(i.status ==
|
|
162
|
-
self.job.status +=
|
|
163
|
+
if any(i.status == "failed" for i in self.job.items):
|
|
164
|
+
self.job.status += "-errors"
|
|
163
165
|
except HarvestValidationError as e:
|
|
164
|
-
log.exception(
|
|
166
|
+
log.exception(
|
|
167
|
+
f'Harvesting validation failed for "{safe_unicode(self.source.name)}" ({self.source.backend})'
|
|
168
|
+
)
|
|
165
169
|
|
|
166
|
-
self.job.status =
|
|
170
|
+
self.job.status = "failed"
|
|
167
171
|
|
|
168
172
|
error = HarvestError(message=safe_unicode(e))
|
|
169
173
|
self.job.errors.append(error)
|
|
170
174
|
except Exception as e:
|
|
171
|
-
log.exception(
|
|
175
|
+
log.exception(
|
|
176
|
+
f'Harvesting failed for "{safe_unicode(self.source.name)}" ({self.source.backend})'
|
|
177
|
+
)
|
|
172
178
|
|
|
173
|
-
self.job.status =
|
|
179
|
+
self.job.status = "failed"
|
|
174
180
|
|
|
175
181
|
error = HarvestError(message=safe_unicode(e), details=traceback.format_exc())
|
|
176
182
|
self.job.errors.append(error)
|
|
177
183
|
finally:
|
|
178
184
|
self.end_job()
|
|
179
|
-
|
|
185
|
+
|
|
180
186
|
return self.job
|
|
181
187
|
|
|
182
188
|
def process_dataset(self, remote_id: str, **kwargs):
|
|
183
|
-
log.debug(f
|
|
189
|
+
log.debug(f"Processing dataset {remote_id}…")
|
|
184
190
|
|
|
185
191
|
# TODO add `type` to `HarvestItem` to differentiate `Dataset` from `Dataservice`
|
|
186
|
-
item = HarvestItem(status=
|
|
192
|
+
item = HarvestItem(status="started", started=datetime.utcnow(), remote_id=remote_id)
|
|
187
193
|
self.job.items.append(item)
|
|
188
194
|
self.save_job()
|
|
189
195
|
|
|
@@ -207,42 +213,45 @@ class BaseBackend(object):
|
|
|
207
213
|
else:
|
|
208
214
|
dataset.save()
|
|
209
215
|
item.dataset = dataset
|
|
210
|
-
item.status =
|
|
216
|
+
item.status = "done"
|
|
211
217
|
except HarvestSkipException as e:
|
|
212
|
-
item.status =
|
|
218
|
+
item.status = "skipped"
|
|
213
219
|
|
|
214
|
-
log.info(f
|
|
220
|
+
log.info(f"Skipped item {item.remote_id} : {safe_unicode(e)}")
|
|
215
221
|
item.errors.append(HarvestError(message=safe_unicode(e)))
|
|
216
222
|
except HarvestValidationError as e:
|
|
217
|
-
item.status =
|
|
223
|
+
item.status = "failed"
|
|
218
224
|
|
|
219
|
-
log.info(f
|
|
225
|
+
log.info(f"Error validating item {item.remote_id} : {safe_unicode(e)}")
|
|
220
226
|
item.errors.append(HarvestError(message=safe_unicode(e)))
|
|
221
227
|
except Exception as e:
|
|
222
|
-
item.status =
|
|
223
|
-
log.exception(f
|
|
228
|
+
item.status = "failed"
|
|
229
|
+
log.exception(f"Error while processing {item.remote_id} : {safe_unicode(e)}")
|
|
224
230
|
|
|
225
231
|
error = HarvestError(message=safe_unicode(e), details=traceback.format_exc())
|
|
226
232
|
item.errors.append(error)
|
|
227
233
|
finally:
|
|
228
234
|
current_app.logger.removeHandler(log_catcher)
|
|
229
235
|
item.ended = datetime.utcnow()
|
|
230
|
-
item.logs = [
|
|
236
|
+
item.logs = [
|
|
237
|
+
HarvestLog(level=record.levelname, message=record.getMessage())
|
|
238
|
+
for record in log_catcher.records
|
|
239
|
+
]
|
|
231
240
|
self.save_job()
|
|
232
241
|
|
|
233
242
|
def is_done(self) -> bool:
|
|
234
|
-
|
|
243
|
+
"""Should be called after process_dataset to know if we reach the max items"""
|
|
235
244
|
return self.max_items and len(self.job.items) >= self.max_items
|
|
236
245
|
|
|
237
|
-
def process_dataservice(self, remote_id: str, **kwargs) -> bool
|
|
238
|
-
|
|
246
|
+
def process_dataservice(self, remote_id: str, **kwargs) -> bool:
|
|
247
|
+
"""
|
|
239
248
|
Return `True` if the parent should stop iterating because we exceed the number
|
|
240
249
|
of items to process.
|
|
241
|
-
|
|
242
|
-
log.debug(f
|
|
250
|
+
"""
|
|
251
|
+
log.debug(f"Processing dataservice {remote_id}…")
|
|
243
252
|
|
|
244
253
|
# TODO add `type` to `HarvestItem` to differentiate `Dataset` from `Dataservice`
|
|
245
|
-
item = HarvestItem(status=
|
|
254
|
+
item = HarvestItem(status="started", started=datetime.utcnow(), remote_id=remote_id)
|
|
246
255
|
self.job.items.append(item)
|
|
247
256
|
self.save_job()
|
|
248
257
|
|
|
@@ -252,7 +261,9 @@ class BaseBackend(object):
|
|
|
252
261
|
|
|
253
262
|
dataservice = self.inner_process_dataservice(item, **kwargs)
|
|
254
263
|
|
|
255
|
-
dataservice.harvest = self.update_dataservice_harvest_info(
|
|
264
|
+
dataservice.harvest = self.update_dataservice_harvest_info(
|
|
265
|
+
dataservice.harvest, remote_id
|
|
266
|
+
)
|
|
256
267
|
dataservice.archived_at = None
|
|
257
268
|
|
|
258
269
|
# TODO: Apply editable mappings
|
|
@@ -262,20 +273,20 @@ class BaseBackend(object):
|
|
|
262
273
|
else:
|
|
263
274
|
dataservice.save()
|
|
264
275
|
item.dataservice = dataservice
|
|
265
|
-
item.status =
|
|
276
|
+
item.status = "done"
|
|
266
277
|
except HarvestSkipException as e:
|
|
267
|
-
item.status =
|
|
278
|
+
item.status = "skipped"
|
|
268
279
|
|
|
269
|
-
log.info(f
|
|
280
|
+
log.info(f"Skipped item {item.remote_id} : {safe_unicode(e)}")
|
|
270
281
|
item.errors.append(HarvestError(message=safe_unicode(e)))
|
|
271
282
|
except HarvestValidationError as e:
|
|
272
|
-
item.status =
|
|
283
|
+
item.status = "failed"
|
|
273
284
|
|
|
274
|
-
log.info(f
|
|
285
|
+
log.info(f"Error validating item {item.remote_id} : {safe_unicode(e)}")
|
|
275
286
|
item.errors.append(HarvestError(message=safe_unicode(e)))
|
|
276
287
|
except Exception as e:
|
|
277
|
-
item.status =
|
|
278
|
-
log.exception(f
|
|
288
|
+
item.status = "failed"
|
|
289
|
+
log.exception(f"Error while processing {item.remote_id} : {safe_unicode(e)}")
|
|
279
290
|
|
|
280
291
|
error = HarvestError(message=safe_unicode(e), details=traceback.format_exc())
|
|
281
292
|
item.errors.append(error)
|
|
@@ -299,7 +310,9 @@ class BaseBackend(object):
|
|
|
299
310
|
|
|
300
311
|
return harvest
|
|
301
312
|
|
|
302
|
-
def update_dataservice_harvest_info(
|
|
313
|
+
def update_dataservice_harvest_info(
|
|
314
|
+
self, harvest: HarvestDataserviceMetadata | None, remote_id: int
|
|
315
|
+
):
|
|
303
316
|
if not harvest:
|
|
304
317
|
harvest = HarvestDataserviceMetadata()
|
|
305
318
|
|
|
@@ -328,45 +341,47 @@ class BaseBackend(object):
|
|
|
328
341
|
after_harvest_job.send(self)
|
|
329
342
|
|
|
330
343
|
def autoarchive(self):
|
|
331
|
-
|
|
344
|
+
"""
|
|
332
345
|
Archive items that exist on the local instance but not on remote platform
|
|
333
346
|
after a grace period of HARVEST_AUTOARCHIVE_GRACE_DAYS days.
|
|
334
|
-
|
|
335
|
-
log.debug(
|
|
336
|
-
limit_days = current_app.config[
|
|
347
|
+
"""
|
|
348
|
+
log.debug("Running autoarchive")
|
|
349
|
+
limit_days = current_app.config["HARVEST_AUTOARCHIVE_GRACE_DAYS"]
|
|
337
350
|
limit_date = date.today() - timedelta(days=limit_days)
|
|
338
|
-
remote_ids = [i.remote_id for i in self.job.items if i.status !=
|
|
351
|
+
remote_ids = [i.remote_id for i in self.job.items if i.status != "archived"]
|
|
339
352
|
q = {
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
353
|
+
"harvest__source_id": str(self.source.id),
|
|
354
|
+
"harvest__remote_id__nin": remote_ids,
|
|
355
|
+
"harvest__last_update__lt": limit_date,
|
|
343
356
|
}
|
|
344
357
|
local_items_not_on_remote = Dataset.objects.filter(**q)
|
|
345
358
|
|
|
346
359
|
for dataset in local_items_not_on_remote:
|
|
347
360
|
if not dataset.harvest.archived_at:
|
|
348
|
-
archive_harvested_dataset(dataset, reason=
|
|
361
|
+
archive_harvested_dataset(dataset, reason="not-on-remote", dryrun=self.dryrun)
|
|
349
362
|
# add a HarvestItem to the job list (useful for report)
|
|
350
363
|
# even when archiving has already been done (useful for debug)
|
|
351
|
-
self.job.items.append(
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
)
|
|
364
|
+
self.job.items.append(
|
|
365
|
+
HarvestItem(
|
|
366
|
+
remote_id=str(dataset.harvest.remote_id), dataset=dataset, status="archived"
|
|
367
|
+
)
|
|
368
|
+
)
|
|
356
369
|
|
|
357
370
|
self.save_job()
|
|
358
371
|
|
|
359
372
|
def get_dataset(self, remote_id):
|
|
360
|
-
|
|
373
|
+
"""Get or create a dataset given its remote ID (and its source)
|
|
361
374
|
We first try to match `source_id` to be source domain independent
|
|
362
|
-
|
|
363
|
-
dataset = Dataset.objects(
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
375
|
+
"""
|
|
376
|
+
dataset = Dataset.objects(
|
|
377
|
+
__raw__={
|
|
378
|
+
"harvest.remote_id": remote_id,
|
|
379
|
+
"$or": [
|
|
380
|
+
{"harvest.domain": self.source.domain},
|
|
381
|
+
{"harvest.source_id": str(self.source.id)},
|
|
382
|
+
],
|
|
383
|
+
}
|
|
384
|
+
).first()
|
|
370
385
|
|
|
371
386
|
if dataset:
|
|
372
387
|
return dataset
|
|
@@ -377,18 +392,20 @@ class BaseBackend(object):
|
|
|
377
392
|
return Dataset(owner=self.source.owner)
|
|
378
393
|
|
|
379
394
|
return Dataset()
|
|
380
|
-
|
|
395
|
+
|
|
381
396
|
def get_dataservice(self, remote_id):
|
|
382
|
-
|
|
397
|
+
"""Get or create a dataservice given its remote ID (and its source)
|
|
383
398
|
We first try to match `source_id` to be source domain independent
|
|
384
|
-
|
|
385
|
-
dataservice = Dataservice.objects(
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
399
|
+
"""
|
|
400
|
+
dataservice = Dataservice.objects(
|
|
401
|
+
__raw__={
|
|
402
|
+
"harvest.remote_id": remote_id,
|
|
403
|
+
"$or": [
|
|
404
|
+
{"harvest.domain": self.source.domain},
|
|
405
|
+
{"harvest.source_id": str(self.source.id)},
|
|
406
|
+
],
|
|
407
|
+
}
|
|
408
|
+
).first()
|
|
392
409
|
|
|
393
410
|
if dataservice:
|
|
394
411
|
return dataservice
|
|
@@ -401,18 +418,18 @@ class BaseBackend(object):
|
|
|
401
418
|
return Dataservice()
|
|
402
419
|
|
|
403
420
|
def validate(self, data, schema):
|
|
404
|
-
|
|
421
|
+
"""Perform a data validation against a given schema.
|
|
405
422
|
|
|
406
423
|
:param data: an object to validate
|
|
407
424
|
:param schema: a Voluptous schema to validate against
|
|
408
|
-
|
|
425
|
+
"""
|
|
409
426
|
try:
|
|
410
427
|
return schema(data)
|
|
411
428
|
except MultipleInvalid as ie:
|
|
412
429
|
errors = []
|
|
413
430
|
for error in ie.errors:
|
|
414
431
|
if error.path:
|
|
415
|
-
field =
|
|
432
|
+
field = ".".join(str(p) for p in error.path)
|
|
416
433
|
path = error.path
|
|
417
434
|
value = data
|
|
418
435
|
while path:
|
|
@@ -424,21 +441,21 @@ class BaseBackend(object):
|
|
|
424
441
|
except Exception:
|
|
425
442
|
value = None
|
|
426
443
|
|
|
427
|
-
txt = safe_unicode(error).replace(
|
|
444
|
+
txt = safe_unicode(error).replace("for dictionary value", "")
|
|
428
445
|
txt = txt.strip()
|
|
429
446
|
if isinstance(error, RequiredFieldInvalid):
|
|
430
|
-
msg =
|
|
447
|
+
msg = "[{0}] {1}"
|
|
431
448
|
else:
|
|
432
|
-
msg =
|
|
449
|
+
msg = "[{0}] {1}: {2}"
|
|
433
450
|
try:
|
|
434
451
|
msg = msg.format(field, txt, str(value))
|
|
435
452
|
except Exception:
|
|
436
|
-
msg =
|
|
453
|
+
msg = "[{0}] {1}".format(field, txt)
|
|
437
454
|
|
|
438
455
|
else:
|
|
439
456
|
msg = str(error)
|
|
440
457
|
errors.append(msg)
|
|
441
|
-
msg =
|
|
458
|
+
msg = "\n- ".join(["Validation error:"] + errors)
|
|
442
459
|
raise HarvestValidationError(msg)
|
|
443
460
|
|
|
444
461
|
|
|
@@ -450,4 +467,4 @@ class LogCatcher(logging.Handler):
|
|
|
450
467
|
super().__init__()
|
|
451
468
|
|
|
452
469
|
def emit(self, record):
|
|
453
|
-
self.records.append(record)
|
|
470
|
+
self.records.append(record)
|