udata 9.1.2.dev30355__py2.py3-none-any.whl → 9.1.2.dev30382__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of udata might be problematic. Click here for more details.
- tasks/__init__.py +109 -107
- tasks/helpers.py +18 -18
- udata/__init__.py +4 -4
- udata/admin/views.py +5 -5
- udata/api/__init__.py +135 -124
- udata/api/commands.py +45 -37
- udata/api/errors.py +5 -4
- udata/api/fields.py +23 -21
- udata/api/oauth2.py +55 -74
- udata/api/parsers.py +15 -15
- udata/api/signals.py +1 -1
- udata/api_fields.py +137 -89
- udata/app.py +56 -54
- udata/assets.py +5 -5
- udata/auth/__init__.py +37 -26
- udata/auth/forms.py +23 -15
- udata/auth/helpers.py +1 -1
- udata/auth/mails.py +3 -3
- udata/auth/password_validation.py +19 -15
- udata/auth/views.py +94 -68
- udata/commands/__init__.py +71 -69
- udata/commands/cache.py +7 -7
- udata/commands/db.py +201 -140
- udata/commands/dcat.py +36 -30
- udata/commands/fixtures.py +100 -84
- udata/commands/images.py +21 -20
- udata/commands/info.py +17 -20
- udata/commands/init.py +10 -10
- udata/commands/purge.py +12 -13
- udata/commands/serve.py +41 -29
- udata/commands/static.py +16 -18
- udata/commands/test.py +20 -20
- udata/commands/tests/fixtures.py +26 -24
- udata/commands/worker.py +31 -33
- udata/core/__init__.py +12 -12
- udata/core/activity/__init__.py +0 -1
- udata/core/activity/api.py +59 -49
- udata/core/activity/models.py +28 -26
- udata/core/activity/signals.py +1 -1
- udata/core/activity/tasks.py +16 -10
- udata/core/badges/api.py +6 -6
- udata/core/badges/commands.py +14 -13
- udata/core/badges/fields.py +8 -5
- udata/core/badges/forms.py +7 -4
- udata/core/badges/models.py +16 -31
- udata/core/badges/permissions.py +1 -3
- udata/core/badges/signals.py +2 -2
- udata/core/badges/tasks.py +3 -2
- udata/core/badges/tests/test_commands.py +10 -10
- udata/core/badges/tests/test_model.py +24 -31
- udata/core/contact_point/api.py +19 -18
- udata/core/contact_point/api_fields.py +21 -14
- udata/core/contact_point/factories.py +2 -2
- udata/core/contact_point/forms.py +7 -6
- udata/core/contact_point/models.py +3 -5
- udata/core/dataservices/api.py +26 -21
- udata/core/dataservices/factories.py +13 -11
- udata/core/dataservices/models.py +35 -40
- udata/core/dataservices/permissions.py +4 -4
- udata/core/dataservices/rdf.py +40 -17
- udata/core/dataservices/tasks.py +4 -3
- udata/core/dataset/actions.py +10 -10
- udata/core/dataset/activities.py +21 -23
- udata/core/dataset/api.py +321 -298
- udata/core/dataset/api_fields.py +443 -271
- udata/core/dataset/apiv2.py +305 -229
- udata/core/dataset/commands.py +38 -36
- udata/core/dataset/constants.py +61 -54
- udata/core/dataset/csv.py +70 -74
- udata/core/dataset/events.py +39 -32
- udata/core/dataset/exceptions.py +8 -4
- udata/core/dataset/factories.py +57 -65
- udata/core/dataset/forms.py +87 -63
- udata/core/dataset/models.py +336 -280
- udata/core/dataset/permissions.py +9 -6
- udata/core/dataset/preview.py +15 -17
- udata/core/dataset/rdf.py +156 -122
- udata/core/dataset/search.py +92 -77
- udata/core/dataset/signals.py +1 -1
- udata/core/dataset/tasks.py +63 -54
- udata/core/discussions/actions.py +5 -5
- udata/core/discussions/api.py +124 -120
- udata/core/discussions/factories.py +2 -2
- udata/core/discussions/forms.py +9 -7
- udata/core/discussions/metrics.py +1 -3
- udata/core/discussions/models.py +25 -24
- udata/core/discussions/notifications.py +18 -14
- udata/core/discussions/permissions.py +3 -3
- udata/core/discussions/signals.py +4 -4
- udata/core/discussions/tasks.py +24 -28
- udata/core/followers/api.py +32 -33
- udata/core/followers/models.py +9 -9
- udata/core/followers/signals.py +3 -3
- udata/core/jobs/actions.py +7 -7
- udata/core/jobs/api.py +99 -92
- udata/core/jobs/commands.py +48 -49
- udata/core/jobs/forms.py +11 -11
- udata/core/jobs/models.py +6 -6
- udata/core/metrics/__init__.py +2 -2
- udata/core/metrics/commands.py +34 -30
- udata/core/metrics/models.py +2 -4
- udata/core/metrics/signals.py +1 -1
- udata/core/metrics/tasks.py +3 -3
- udata/core/organization/activities.py +12 -15
- udata/core/organization/api.py +167 -174
- udata/core/organization/api_fields.py +183 -124
- udata/core/organization/apiv2.py +32 -32
- udata/core/organization/commands.py +20 -22
- udata/core/organization/constants.py +11 -11
- udata/core/organization/csv.py +17 -15
- udata/core/organization/factories.py +8 -11
- udata/core/organization/forms.py +32 -26
- udata/core/organization/metrics.py +2 -1
- udata/core/organization/models.py +87 -67
- udata/core/organization/notifications.py +18 -14
- udata/core/organization/permissions.py +10 -11
- udata/core/organization/rdf.py +14 -14
- udata/core/organization/search.py +30 -28
- udata/core/organization/signals.py +7 -7
- udata/core/organization/tasks.py +42 -61
- udata/core/owned.py +38 -27
- udata/core/post/api.py +82 -81
- udata/core/post/constants.py +8 -5
- udata/core/post/factories.py +4 -4
- udata/core/post/forms.py +13 -14
- udata/core/post/models.py +20 -22
- udata/core/post/tests/test_api.py +30 -32
- udata/core/reports/api.py +8 -7
- udata/core/reports/constants.py +1 -3
- udata/core/reports/models.py +10 -10
- udata/core/reuse/activities.py +15 -19
- udata/core/reuse/api.py +123 -126
- udata/core/reuse/api_fields.py +120 -85
- udata/core/reuse/apiv2.py +11 -10
- udata/core/reuse/constants.py +23 -23
- udata/core/reuse/csv.py +18 -18
- udata/core/reuse/factories.py +5 -9
- udata/core/reuse/forms.py +24 -21
- udata/core/reuse/models.py +55 -51
- udata/core/reuse/permissions.py +2 -2
- udata/core/reuse/search.py +49 -46
- udata/core/reuse/signals.py +1 -1
- udata/core/reuse/tasks.py +4 -5
- udata/core/site/api.py +47 -50
- udata/core/site/factories.py +2 -2
- udata/core/site/forms.py +4 -5
- udata/core/site/models.py +94 -63
- udata/core/site/rdf.py +14 -14
- udata/core/spam/api.py +16 -9
- udata/core/spam/constants.py +4 -4
- udata/core/spam/fields.py +13 -7
- udata/core/spam/models.py +27 -20
- udata/core/spam/signals.py +1 -1
- udata/core/spam/tests/test_spam.py +6 -5
- udata/core/spatial/api.py +72 -80
- udata/core/spatial/api_fields.py +73 -58
- udata/core/spatial/commands.py +67 -64
- udata/core/spatial/constants.py +3 -3
- udata/core/spatial/factories.py +37 -54
- udata/core/spatial/forms.py +27 -26
- udata/core/spatial/geoids.py +17 -17
- udata/core/spatial/models.py +43 -47
- udata/core/spatial/tasks.py +2 -1
- udata/core/spatial/tests/test_api.py +115 -130
- udata/core/spatial/tests/test_fields.py +74 -77
- udata/core/spatial/tests/test_geoid.py +22 -22
- udata/core/spatial/tests/test_models.py +5 -7
- udata/core/spatial/translations.py +16 -16
- udata/core/storages/__init__.py +16 -18
- udata/core/storages/api.py +66 -64
- udata/core/storages/tasks.py +7 -7
- udata/core/storages/utils.py +15 -15
- udata/core/storages/views.py +5 -6
- udata/core/tags/api.py +17 -14
- udata/core/tags/csv.py +4 -4
- udata/core/tags/models.py +8 -5
- udata/core/tags/tasks.py +11 -13
- udata/core/tags/views.py +4 -4
- udata/core/topic/api.py +84 -73
- udata/core/topic/apiv2.py +157 -127
- udata/core/topic/factories.py +3 -4
- udata/core/topic/forms.py +12 -14
- udata/core/topic/models.py +14 -19
- udata/core/topic/parsers.py +26 -26
- udata/core/user/activities.py +30 -29
- udata/core/user/api.py +151 -152
- udata/core/user/api_fields.py +132 -100
- udata/core/user/apiv2.py +7 -7
- udata/core/user/commands.py +38 -38
- udata/core/user/factories.py +8 -9
- udata/core/user/forms.py +14 -11
- udata/core/user/metrics.py +2 -2
- udata/core/user/models.py +68 -69
- udata/core/user/permissions.py +4 -5
- udata/core/user/rdf.py +7 -8
- udata/core/user/tasks.py +2 -2
- udata/core/user/tests/test_user_model.py +24 -16
- udata/db/tasks.py +2 -1
- udata/entrypoints.py +35 -31
- udata/errors.py +2 -1
- udata/event/values.py +6 -6
- udata/factories.py +2 -2
- udata/features/identicon/api.py +5 -6
- udata/features/identicon/backends.py +48 -55
- udata/features/identicon/tests/test_backends.py +4 -5
- udata/features/notifications/__init__.py +0 -1
- udata/features/notifications/actions.py +9 -9
- udata/features/notifications/api.py +17 -13
- udata/features/territories/__init__.py +12 -10
- udata/features/territories/api.py +14 -15
- udata/features/territories/models.py +23 -28
- udata/features/transfer/actions.py +8 -11
- udata/features/transfer/api.py +84 -77
- udata/features/transfer/factories.py +2 -1
- udata/features/transfer/models.py +11 -12
- udata/features/transfer/notifications.py +19 -15
- udata/features/transfer/permissions.py +5 -5
- udata/forms/__init__.py +5 -2
- udata/forms/fields.py +164 -172
- udata/forms/validators.py +19 -22
- udata/forms/widgets.py +9 -13
- udata/frontend/__init__.py +31 -26
- udata/frontend/csv.py +68 -58
- udata/frontend/markdown.py +40 -44
- udata/harvest/actions.py +89 -77
- udata/harvest/api.py +294 -238
- udata/harvest/backends/__init__.py +4 -4
- udata/harvest/backends/base.py +128 -111
- udata/harvest/backends/dcat.py +80 -66
- udata/harvest/commands.py +56 -60
- udata/harvest/csv.py +8 -8
- udata/harvest/exceptions.py +6 -3
- udata/harvest/filters.py +24 -23
- udata/harvest/forms.py +27 -28
- udata/harvest/models.py +88 -80
- udata/harvest/notifications.py +15 -10
- udata/harvest/signals.py +13 -13
- udata/harvest/tasks.py +11 -10
- udata/harvest/tests/factories.py +23 -24
- udata/harvest/tests/test_actions.py +136 -166
- udata/harvest/tests/test_api.py +220 -214
- udata/harvest/tests/test_base_backend.py +117 -112
- udata/harvest/tests/test_dcat_backend.py +380 -308
- udata/harvest/tests/test_filters.py +33 -22
- udata/harvest/tests/test_models.py +11 -14
- udata/harvest/tests/test_notifications.py +6 -7
- udata/harvest/tests/test_tasks.py +7 -6
- udata/i18n.py +237 -78
- udata/linkchecker/backends.py +5 -11
- udata/linkchecker/checker.py +23 -22
- udata/linkchecker/commands.py +4 -6
- udata/linkchecker/models.py +6 -6
- udata/linkchecker/tasks.py +18 -20
- udata/mail.py +21 -21
- udata/migrations/2020-07-24-remove-s-from-scope-oauth.py +9 -8
- udata/migrations/2020-08-24-add-fs-filename.py +9 -8
- udata/migrations/2020-09-28-update-reuses-datasets-metrics.py +5 -4
- udata/migrations/2020-10-16-migrate-ods-resources.py +9 -10
- udata/migrations/2021-04-08-update-schema-with-new-structure.py +8 -7
- udata/migrations/2021-05-27-fix-default-schema-name.py +7 -6
- udata/migrations/2021-07-05-remove-unused-badges.py +17 -15
- udata/migrations/2021-07-07-update-schema-for-community-resources.py +7 -6
- udata/migrations/2021-08-17-follow-integrity.py +5 -4
- udata/migrations/2021-08-17-harvest-integrity.py +13 -12
- udata/migrations/2021-08-17-oauth2client-integrity.py +5 -4
- udata/migrations/2021-08-17-transfer-integrity.py +5 -4
- udata/migrations/2021-08-17-users-integrity.py +9 -8
- udata/migrations/2021-12-14-reuse-topics.py +7 -6
- udata/migrations/2022-04-21-improve-extension-detection.py +8 -7
- udata/migrations/2022-09-22-clean-inactive-harvest-datasets.py +16 -14
- udata/migrations/2022-10-10-add-fs_uniquifier-to-user-model.py +6 -6
- udata/migrations/2022-10-10-migrate-harvest-extras.py +36 -26
- udata/migrations/2023-02-08-rename-internal-dates.py +46 -28
- udata/migrations/2024-01-29-fix-reuse-and-dataset-with-private-None.py +10 -8
- udata/migrations/2024-03-22-migrate-activity-kwargs-to-extras.py +6 -4
- udata/migrations/2024-06-11-fix-reuse-datasets-references.py +7 -6
- udata/migrations/__init__.py +123 -105
- udata/models/__init__.py +4 -4
- udata/mongo/__init__.py +13 -11
- udata/mongo/badges_field.py +3 -2
- udata/mongo/datetime_fields.py +13 -12
- udata/mongo/document.py +17 -16
- udata/mongo/engine.py +15 -16
- udata/mongo/errors.py +2 -1
- udata/mongo/extras_fields.py +30 -20
- udata/mongo/queryset.py +12 -12
- udata/mongo/slug_fields.py +38 -28
- udata/mongo/taglist_field.py +1 -2
- udata/mongo/url_field.py +5 -5
- udata/mongo/uuid_fields.py +4 -3
- udata/notifications/__init__.py +1 -1
- udata/notifications/mattermost.py +10 -9
- udata/rdf.py +167 -188
- udata/routing.py +40 -45
- udata/search/__init__.py +18 -19
- udata/search/adapter.py +17 -16
- udata/search/commands.py +44 -51
- udata/search/fields.py +13 -20
- udata/search/query.py +23 -18
- udata/search/result.py +9 -10
- udata/sentry.py +21 -19
- udata/settings.py +262 -198
- udata/sitemap.py +8 -6
- udata/static/chunks/{11.e9b9ca1f3e03d4020377.js → 11.52e531c19f8de80c00cf.js} +3 -3
- udata/static/chunks/{11.e9b9ca1f3e03d4020377.js.map → 11.52e531c19f8de80c00cf.js.map} +1 -1
- udata/static/chunks/{13.038c0d9aa0dfa0181c4b.js → 13.c3343a7f1070061c0e10.js} +2 -2
- udata/static/chunks/{13.038c0d9aa0dfa0181c4b.js.map → 13.c3343a7f1070061c0e10.js.map} +1 -1
- udata/static/chunks/{16.0baa2b64a74a2dcde25c.js → 16.8fa42440ad75ca172e6d.js} +2 -2
- udata/static/chunks/{16.0baa2b64a74a2dcde25c.js.map → 16.8fa42440ad75ca172e6d.js.map} +1 -1
- udata/static/chunks/{19.350a9f150b074b4ecefa.js → 19.9c6c8412729cd6d59cfa.js} +3 -3
- udata/static/chunks/{19.350a9f150b074b4ecefa.js.map → 19.9c6c8412729cd6d59cfa.js.map} +1 -1
- udata/static/chunks/{5.6ebbce2b9b3e696d3da5.js → 5.71d15c2e4f21feee2a9a.js} +3 -3
- udata/static/chunks/{5.6ebbce2b9b3e696d3da5.js.map → 5.71d15c2e4f21feee2a9a.js.map} +1 -1
- udata/static/chunks/{6.d8a5f7b017bcbd083641.js → 6.9139dc098b8ea640b890.js} +3 -3
- udata/static/chunks/{6.d8a5f7b017bcbd083641.js.map → 6.9139dc098b8ea640b890.js.map} +1 -1
- udata/static/common.js +1 -1
- udata/static/common.js.map +1 -1
- udata/storage/s3.py +20 -13
- udata/tags.py +4 -5
- udata/tasks.py +43 -42
- udata/tests/__init__.py +9 -6
- udata/tests/api/__init__.py +5 -6
- udata/tests/api/test_auth_api.py +395 -321
- udata/tests/api/test_base_api.py +31 -33
- udata/tests/api/test_contact_points.py +7 -9
- udata/tests/api/test_dataservices_api.py +211 -158
- udata/tests/api/test_datasets_api.py +823 -812
- udata/tests/api/test_follow_api.py +13 -15
- udata/tests/api/test_me_api.py +95 -112
- udata/tests/api/test_organizations_api.py +301 -339
- udata/tests/api/test_reports_api.py +35 -25
- udata/tests/api/test_reuses_api.py +134 -139
- udata/tests/api/test_swagger.py +5 -5
- udata/tests/api/test_tags_api.py +18 -25
- udata/tests/api/test_topics_api.py +94 -94
- udata/tests/api/test_transfer_api.py +53 -48
- udata/tests/api/test_user_api.py +128 -141
- udata/tests/apiv2/test_datasets.py +290 -198
- udata/tests/apiv2/test_me_api.py +10 -11
- udata/tests/apiv2/test_organizations.py +56 -74
- udata/tests/apiv2/test_swagger.py +5 -5
- udata/tests/apiv2/test_topics.py +69 -87
- udata/tests/cli/test_cli_base.py +8 -8
- udata/tests/cli/test_db_cli.py +21 -19
- udata/tests/dataservice/test_dataservice_tasks.py +8 -12
- udata/tests/dataset/test_csv_adapter.py +44 -35
- udata/tests/dataset/test_dataset_actions.py +2 -3
- udata/tests/dataset/test_dataset_commands.py +7 -8
- udata/tests/dataset/test_dataset_events.py +36 -29
- udata/tests/dataset/test_dataset_model.py +224 -217
- udata/tests/dataset/test_dataset_rdf.py +142 -131
- udata/tests/dataset/test_dataset_tasks.py +15 -15
- udata/tests/dataset/test_resource_preview.py +10 -13
- udata/tests/features/territories/__init__.py +9 -13
- udata/tests/features/territories/test_territories_api.py +71 -91
- udata/tests/forms/test_basic_fields.py +7 -7
- udata/tests/forms/test_current_user_field.py +39 -66
- udata/tests/forms/test_daterange_field.py +31 -39
- udata/tests/forms/test_dict_field.py +28 -26
- udata/tests/forms/test_extras_fields.py +102 -76
- udata/tests/forms/test_form_field.py +8 -8
- udata/tests/forms/test_image_field.py +33 -26
- udata/tests/forms/test_model_field.py +134 -123
- udata/tests/forms/test_model_list_field.py +7 -7
- udata/tests/forms/test_nested_model_list_field.py +117 -79
- udata/tests/forms/test_publish_as_field.py +36 -65
- udata/tests/forms/test_reference_field.py +34 -53
- udata/tests/forms/test_user_forms.py +23 -21
- udata/tests/forms/test_uuid_field.py +6 -10
- udata/tests/frontend/__init__.py +9 -6
- udata/tests/frontend/test_auth.py +7 -6
- udata/tests/frontend/test_csv.py +81 -96
- udata/tests/frontend/test_hooks.py +43 -43
- udata/tests/frontend/test_markdown.py +211 -191
- udata/tests/helpers.py +32 -37
- udata/tests/models.py +2 -2
- udata/tests/organization/test_csv_adapter.py +21 -16
- udata/tests/organization/test_notifications.py +11 -18
- udata/tests/organization/test_organization_model.py +13 -13
- udata/tests/organization/test_organization_rdf.py +29 -22
- udata/tests/organization/test_organization_tasks.py +16 -17
- udata/tests/plugin.py +76 -73
- udata/tests/reuse/test_reuse_model.py +21 -21
- udata/tests/reuse/test_reuse_task.py +11 -13
- udata/tests/search/__init__.py +11 -12
- udata/tests/search/test_adapter.py +60 -70
- udata/tests/search/test_query.py +16 -16
- udata/tests/search/test_results.py +10 -7
- udata/tests/site/test_site_api.py +11 -16
- udata/tests/site/test_site_metrics.py +20 -30
- udata/tests/site/test_site_model.py +4 -5
- udata/tests/site/test_site_rdf.py +94 -78
- udata/tests/test_activity.py +17 -17
- udata/tests/test_discussions.py +292 -299
- udata/tests/test_i18n.py +37 -40
- udata/tests/test_linkchecker.py +91 -85
- udata/tests/test_mail.py +13 -17
- udata/tests/test_migrations.py +219 -180
- udata/tests/test_model.py +164 -157
- udata/tests/test_notifications.py +17 -17
- udata/tests/test_owned.py +14 -14
- udata/tests/test_rdf.py +25 -23
- udata/tests/test_routing.py +89 -93
- udata/tests/test_storages.py +137 -128
- udata/tests/test_tags.py +44 -46
- udata/tests/test_topics.py +7 -7
- udata/tests/test_transfer.py +42 -49
- udata/tests/test_uris.py +160 -161
- udata/tests/test_utils.py +79 -71
- udata/tests/user/test_user_rdf.py +5 -9
- udata/tests/workers/test_jobs_commands.py +57 -58
- udata/tests/workers/test_tasks_routing.py +23 -29
- udata/tests/workers/test_workers_api.py +125 -131
- udata/tests/workers/test_workers_helpers.py +6 -6
- udata/tracking.py +4 -6
- udata/uris.py +45 -46
- udata/utils.py +68 -66
- udata/wsgi.py +1 -1
- {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30382.dist-info}/METADATA +3 -2
- udata-9.1.2.dev30382.dist-info/RECORD +704 -0
- udata-9.1.2.dev30355.dist-info/RECORD +0 -704
- {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30382.dist-info}/LICENSE +0 -0
- {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30382.dist-info}/WHEEL +0 -0
- {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30382.dist-info}/entry_points.txt +0 -0
- {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30382.dist-info}/top_level.txt +0 -0
udata/frontend/markdown.py
CHANGED
|
@@ -1,33 +1,29 @@
|
|
|
1
|
+
import re
|
|
1
2
|
from functools import partial
|
|
2
3
|
from urllib.parse import urlparse
|
|
3
4
|
|
|
4
5
|
import bleach
|
|
5
6
|
import html2text
|
|
6
7
|
import mistune
|
|
7
|
-
import re
|
|
8
|
-
|
|
9
8
|
from bleach.linkifier import LinkifyFilter
|
|
10
|
-
from flask import
|
|
9
|
+
from flask import Markup, current_app, request
|
|
10
|
+
from jinja2.filters import do_striptags, do_truncate
|
|
11
11
|
from werkzeug.local import LocalProxy
|
|
12
|
-
from jinja2.filters import do_truncate, do_striptags
|
|
13
12
|
|
|
14
13
|
from udata.i18n import _
|
|
15
14
|
|
|
15
|
+
md = LocalProxy(lambda: current_app.extensions["markdown"])
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
EXCERPT_TOKEN = '<!--- --- -->'
|
|
17
|
+
EXCERPT_TOKEN = "<!--- --- -->"
|
|
20
18
|
|
|
21
|
-
RE_AUTOLINK = re.compile(
|
|
22
|
-
r'<([A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*)>',
|
|
23
|
-
re.IGNORECASE)
|
|
19
|
+
RE_AUTOLINK = re.compile(r"<([A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*)>", re.IGNORECASE)
|
|
24
20
|
|
|
25
21
|
|
|
26
22
|
def source_tooltip_callback(attrs, new=False):
|
|
27
23
|
"""
|
|
28
24
|
Add a `data-tooltip` attribute with `Source` content for embeds.
|
|
29
25
|
"""
|
|
30
|
-
attrs[(None,
|
|
26
|
+
attrs[(None, "data-tooltip")] = _("Source")
|
|
31
27
|
return attrs
|
|
32
28
|
|
|
33
29
|
|
|
@@ -39,31 +35,29 @@ def nofollow_callback(attrs, new=False):
|
|
|
39
35
|
That callback is not splitted in order to parse the URL only once.
|
|
40
36
|
"""
|
|
41
37
|
|
|
42
|
-
if (None,
|
|
38
|
+
if (None, "href") not in attrs:
|
|
43
39
|
return attrs
|
|
44
|
-
parsed_url = urlparse(attrs[(None,
|
|
45
|
-
if parsed_url.netloc in (
|
|
40
|
+
parsed_url = urlparse(attrs[(None, "href")])
|
|
41
|
+
if parsed_url.netloc in ("", current_app.config["SERVER_NAME"]):
|
|
46
42
|
path = parsed_url.path
|
|
47
|
-
attrs[(None,
|
|
48
|
-
scheme=
|
|
49
|
-
netloc=current_app.config[
|
|
50
|
-
path=path if path.startswith(
|
|
51
|
-
query=
|
|
43
|
+
attrs[(None, "href")] = "{scheme}://{netloc}{path}{query}".format(
|
|
44
|
+
scheme="https" if request.is_secure else "http",
|
|
45
|
+
netloc=current_app.config["SERVER_NAME"],
|
|
46
|
+
path=path if path.startswith("/") else f"/{path}",
|
|
47
|
+
query="?" + parsed_url.query if parsed_url.query else "",
|
|
48
|
+
)
|
|
52
49
|
return attrs
|
|
53
50
|
else:
|
|
54
|
-
rel = [x for x in attrs.get((None,
|
|
55
|
-
if
|
|
56
|
-
rel.append(
|
|
57
|
-
attrs[(None,
|
|
51
|
+
rel = [x for x in attrs.get((None, "rel"), "").split(" ") if x]
|
|
52
|
+
if "nofollow" not in [x.lower() for x in rel]:
|
|
53
|
+
rel.append("nofollow")
|
|
54
|
+
attrs[(None, "rel")] = " ".join(rel)
|
|
58
55
|
return attrs
|
|
59
56
|
|
|
60
57
|
|
|
61
58
|
class Renderer(mistune.Renderer):
|
|
62
59
|
def table(self, header, body):
|
|
63
|
-
return (
|
|
64
|
-
'<table>\n<thead>\n%s</thead>\n'
|
|
65
|
-
'<tbody>\n%s</tbody>\n</table>\n'
|
|
66
|
-
) % (header, body)
|
|
60
|
+
return ("<table>\n<thead>\n%s</thead>\n" "<tbody>\n%s</tbody>\n</table>\n") % (header, body)
|
|
67
61
|
|
|
68
62
|
|
|
69
63
|
class UdataCleaner(bleach.Cleaner):
|
|
@@ -73,29 +67,31 @@ class UdataCleaner(bleach.Cleaner):
|
|
|
73
67
|
callbacks.append(source_tooltip_callback)
|
|
74
68
|
|
|
75
69
|
super().__init__(
|
|
76
|
-
tags=current_app.config[
|
|
77
|
-
attributes=current_app.config[
|
|
78
|
-
styles=current_app.config[
|
|
79
|
-
protocols=current_app.config[
|
|
70
|
+
tags=current_app.config["MD_ALLOWED_TAGS"],
|
|
71
|
+
attributes=current_app.config["MD_ALLOWED_ATTRIBUTES"],
|
|
72
|
+
styles=current_app.config["MD_ALLOWED_STYLES"],
|
|
73
|
+
protocols=current_app.config["MD_ALLOWED_PROTOCOLS"],
|
|
80
74
|
strip_comments=False,
|
|
81
|
-
filters=[
|
|
82
|
-
|
|
75
|
+
filters=[
|
|
76
|
+
partial(LinkifyFilter, skip_tags=["pre"], parse_email=False, callbacks=callbacks)
|
|
77
|
+
],
|
|
78
|
+
)
|
|
83
79
|
|
|
84
80
|
|
|
85
81
|
class UDataMarkdown(object):
|
|
86
82
|
"""Consistent with Flask's extensions signature."""
|
|
87
83
|
|
|
88
84
|
def __init__(self, app):
|
|
89
|
-
app.jinja_env.filters.setdefault(
|
|
85
|
+
app.jinja_env.filters.setdefault("markdown", self.__call__)
|
|
90
86
|
renderer = Renderer(escape=False, hard_wrap=True)
|
|
91
87
|
self.markdown = mistune.Markdown(renderer=renderer)
|
|
92
88
|
|
|
93
89
|
def __call__(self, stream, source_tooltip=False, wrap=True):
|
|
94
90
|
if not stream:
|
|
95
|
-
return
|
|
91
|
+
return ""
|
|
96
92
|
|
|
97
93
|
# Prepare angle bracket autolinks to avoid bleach treating them as tag
|
|
98
|
-
stream = RE_AUTOLINK.sub(r
|
|
94
|
+
stream = RE_AUTOLINK.sub(r"[\g<1>](\g<1>)", stream)
|
|
99
95
|
# Turn markdown to HTML.
|
|
100
96
|
html = self.markdown(stream)
|
|
101
97
|
|
|
@@ -108,15 +104,15 @@ class UDataMarkdown(object):
|
|
|
108
104
|
return Markup(html)
|
|
109
105
|
|
|
110
106
|
|
|
111
|
-
def mdstrip(value, length=None, end=
|
|
112
|
-
|
|
107
|
+
def mdstrip(value, length=None, end="…"):
|
|
108
|
+
"""
|
|
113
109
|
Truncate and strip tags from a markdown source
|
|
114
110
|
|
|
115
111
|
The markdown source is truncated at the excerpt if present and
|
|
116
112
|
smaller than the required length. Then, all html tags are stripped.
|
|
117
|
-
|
|
113
|
+
"""
|
|
118
114
|
if not value:
|
|
119
|
-
return
|
|
115
|
+
return ""
|
|
120
116
|
if EXCERPT_TOKEN in value:
|
|
121
117
|
value = value.split(EXCERPT_TOKEN, 1)[0]
|
|
122
118
|
rendered = md(value, wrap=False)
|
|
@@ -127,14 +123,14 @@ def mdstrip(value, length=None, end='…'):
|
|
|
127
123
|
|
|
128
124
|
|
|
129
125
|
def parse_html(html):
|
|
130
|
-
|
|
126
|
+
"""
|
|
131
127
|
Parse HTML and convert it into a udata-compatible markdown string
|
|
132
|
-
|
|
128
|
+
"""
|
|
133
129
|
if not html:
|
|
134
|
-
return
|
|
130
|
+
return ""
|
|
135
131
|
return html2text.html2text(html.strip(), bodywidth=0).strip()
|
|
136
132
|
|
|
137
133
|
|
|
138
134
|
def init_app(app):
|
|
139
|
-
app.extensions[
|
|
135
|
+
app.extensions["markdown"] = UDataMarkdown(app)
|
|
140
136
|
app.add_template_filter(mdstrip)
|
udata/harvest/actions.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
import logging
|
|
2
1
|
import csv
|
|
3
|
-
|
|
2
|
+
import logging
|
|
4
3
|
from collections import namedtuple
|
|
5
4
|
from datetime import datetime, timedelta
|
|
6
5
|
|
|
@@ -9,13 +8,17 @@ from flask import current_app
|
|
|
9
8
|
|
|
10
9
|
from udata.auth import current_user
|
|
11
10
|
from udata.core.dataset.models import HarvestDatasetMetadata
|
|
12
|
-
from udata.models import
|
|
11
|
+
from udata.models import Dataset, Organization, PeriodicTask, User
|
|
13
12
|
from udata.storage.s3 import delete_file
|
|
14
13
|
|
|
15
14
|
from . import backends, signals
|
|
16
15
|
from .models import (
|
|
17
|
-
|
|
18
|
-
VALIDATION_ACCEPTED,
|
|
16
|
+
DEFAULT_HARVEST_FREQUENCY,
|
|
17
|
+
VALIDATION_ACCEPTED,
|
|
18
|
+
VALIDATION_REFUSED,
|
|
19
|
+
HarvestJob,
|
|
20
|
+
HarvestSource,
|
|
21
|
+
archive_harvested_dataset,
|
|
19
22
|
)
|
|
20
23
|
from .tasks import harvest
|
|
21
24
|
|
|
@@ -25,7 +28,7 @@ DEFAULT_PAGE_SIZE = 10
|
|
|
25
28
|
|
|
26
29
|
|
|
27
30
|
def list_backends():
|
|
28
|
-
|
|
31
|
+
"""List all available backends"""
|
|
29
32
|
return backends.get_all(current_app).values()
|
|
30
33
|
|
|
31
34
|
|
|
@@ -39,37 +42,40 @@ def _sources_queryset(owner=None, deleted=False):
|
|
|
39
42
|
|
|
40
43
|
|
|
41
44
|
def list_sources(owner=None, deleted=False):
|
|
42
|
-
|
|
45
|
+
"""List all harvest sources"""
|
|
43
46
|
return list(_sources_queryset(owner=owner, deleted=deleted))
|
|
44
47
|
|
|
45
48
|
|
|
46
49
|
def paginate_sources(owner=None, page=1, page_size=DEFAULT_PAGE_SIZE, deleted=False):
|
|
47
|
-
|
|
50
|
+
"""Paginate harvest sources"""
|
|
48
51
|
sources = _sources_queryset(owner=owner, deleted=deleted)
|
|
49
52
|
page = max(page or 1, 1)
|
|
50
53
|
return sources.paginate(page, page_size)
|
|
51
54
|
|
|
52
55
|
|
|
53
56
|
def get_source(ident):
|
|
54
|
-
|
|
57
|
+
"""Get an harvest source given its ID or its slug"""
|
|
55
58
|
return HarvestSource.get(ident)
|
|
56
59
|
|
|
57
60
|
|
|
58
61
|
def get_job(ident):
|
|
59
|
-
|
|
62
|
+
"""Get an harvest job given its ID"""
|
|
60
63
|
return HarvestJob.objects.get(id=ident)
|
|
61
64
|
|
|
62
65
|
|
|
63
|
-
def create_source(
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
66
|
+
def create_source(
|
|
67
|
+
name,
|
|
68
|
+
url,
|
|
69
|
+
backend,
|
|
70
|
+
description=None,
|
|
71
|
+
frequency=DEFAULT_HARVEST_FREQUENCY,
|
|
72
|
+
owner=None,
|
|
73
|
+
organization=None,
|
|
74
|
+
config=None,
|
|
75
|
+
active=None,
|
|
76
|
+
autoarchive=None,
|
|
77
|
+
):
|
|
78
|
+
"""Create a new harvest source"""
|
|
73
79
|
if owner and not isinstance(owner, User):
|
|
74
80
|
owner = User.get(owner)
|
|
75
81
|
|
|
@@ -93,7 +99,7 @@ def create_source(name, url, backend,
|
|
|
93
99
|
|
|
94
100
|
|
|
95
101
|
def update_source(ident, data):
|
|
96
|
-
|
|
102
|
+
"""Update an harvest source"""
|
|
97
103
|
source = get_source(ident)
|
|
98
104
|
source.modify(**data)
|
|
99
105
|
signals.harvest_source_updated.send(source)
|
|
@@ -101,7 +107,7 @@ def update_source(ident, data):
|
|
|
101
107
|
|
|
102
108
|
|
|
103
109
|
def validate_source(ident, comment=None):
|
|
104
|
-
|
|
110
|
+
"""Validate a source for automatic harvesting"""
|
|
105
111
|
source = get_source(ident)
|
|
106
112
|
source.validation.on = datetime.utcnow()
|
|
107
113
|
source.validation.comment = comment
|
|
@@ -109,13 +115,13 @@ def validate_source(ident, comment=None):
|
|
|
109
115
|
if current_user.is_authenticated:
|
|
110
116
|
source.validation.by = current_user._get_current_object()
|
|
111
117
|
source.save()
|
|
112
|
-
schedule(ident, cron=current_app.config[
|
|
118
|
+
schedule(ident, cron=current_app.config["HARVEST_DEFAULT_SCHEDULE"])
|
|
113
119
|
launch(ident)
|
|
114
120
|
return source
|
|
115
121
|
|
|
116
122
|
|
|
117
123
|
def reject_source(ident, comment):
|
|
118
|
-
|
|
124
|
+
"""Reject a source for automatic harvesting"""
|
|
119
125
|
source = get_source(ident)
|
|
120
126
|
source.validation.on = datetime.utcnow()
|
|
121
127
|
source.validation.comment = comment
|
|
@@ -127,7 +133,7 @@ def reject_source(ident, comment):
|
|
|
127
133
|
|
|
128
134
|
|
|
129
135
|
def delete_source(ident):
|
|
130
|
-
|
|
136
|
+
"""Delete an harvest source"""
|
|
131
137
|
source = get_source(ident)
|
|
132
138
|
source.deleted = datetime.utcnow()
|
|
133
139
|
source.save()
|
|
@@ -136,7 +142,7 @@ def delete_source(ident):
|
|
|
136
142
|
|
|
137
143
|
|
|
138
144
|
def clean_source(ident):
|
|
139
|
-
|
|
145
|
+
"""Deletes all datasets linked to a harvest source"""
|
|
140
146
|
source = get_source(ident)
|
|
141
147
|
datasets = Dataset.objects.filter(harvest__source_id=str(source.id))
|
|
142
148
|
for dataset in datasets:
|
|
@@ -146,7 +152,7 @@ def clean_source(ident):
|
|
|
146
152
|
|
|
147
153
|
|
|
148
154
|
def purge_sources():
|
|
149
|
-
|
|
155
|
+
"""Permanently remove sources flagged as deleted"""
|
|
150
156
|
sources = HarvestSource.objects(deleted__exists=True)
|
|
151
157
|
count = sources.count()
|
|
152
158
|
for source in sources:
|
|
@@ -154,30 +160,34 @@ def purge_sources():
|
|
|
154
160
|
source.periodic_task.delete()
|
|
155
161
|
datasets = Dataset.objects.filter(harvest__source_id=str(source.id))
|
|
156
162
|
for dataset in datasets:
|
|
157
|
-
archive_harvested_dataset(dataset, reason=
|
|
163
|
+
archive_harvested_dataset(dataset, reason="harvester-deleted", dryrun=False)
|
|
158
164
|
source.delete()
|
|
159
165
|
return count
|
|
160
166
|
|
|
161
167
|
|
|
162
168
|
def purge_jobs():
|
|
163
|
-
|
|
164
|
-
retention = current_app.config[
|
|
169
|
+
"""Delete jobs older than retention policy"""
|
|
170
|
+
retention = current_app.config["HARVEST_JOBS_RETENTION_DAYS"]
|
|
165
171
|
expiration = datetime.utcnow() - timedelta(days=retention)
|
|
166
172
|
|
|
167
|
-
jobs_with_external_files = HarvestJob.objects(
|
|
173
|
+
jobs_with_external_files = HarvestJob.objects(
|
|
174
|
+
data__filename__exists=True, created__lt=expiration
|
|
175
|
+
)
|
|
168
176
|
for job in jobs_with_external_files:
|
|
169
|
-
bucket = current_app.config.get(
|
|
177
|
+
bucket = current_app.config.get("HARVEST_GRAPHS_S3_BUCKET")
|
|
170
178
|
if bucket is None:
|
|
171
|
-
log.error(
|
|
179
|
+
log.error(
|
|
180
|
+
f"Bucket isn't configured anymore, but jobs still exist with external filenames. Could not delete them."
|
|
181
|
+
)
|
|
172
182
|
break
|
|
173
183
|
|
|
174
|
-
delete_file(bucket, job.data[
|
|
184
|
+
delete_file(bucket, job.data["filename"])
|
|
175
185
|
|
|
176
186
|
return HarvestJob.objects(created__lt=expiration).delete()
|
|
177
187
|
|
|
178
188
|
|
|
179
189
|
def run(ident):
|
|
180
|
-
|
|
190
|
+
"""Launch or resume an harvesting for a given source if none is running"""
|
|
181
191
|
source = get_source(ident)
|
|
182
192
|
cls = backends.get(current_app, source.backend)
|
|
183
193
|
backend = cls(source)
|
|
@@ -185,29 +195,32 @@ def run(ident):
|
|
|
185
195
|
|
|
186
196
|
|
|
187
197
|
def launch(ident):
|
|
188
|
-
|
|
198
|
+
"""Launch or resume an harvesting for a given source if none is running"""
|
|
189
199
|
return harvest.delay(ident)
|
|
190
200
|
|
|
191
201
|
|
|
192
202
|
def preview(ident):
|
|
193
|
-
|
|
203
|
+
"""Preview an harvesting for a given source"""
|
|
194
204
|
source = get_source(ident)
|
|
195
205
|
cls = backends.get(current_app, source.backend)
|
|
196
|
-
max_items = current_app.config[
|
|
206
|
+
max_items = current_app.config["HARVEST_PREVIEW_MAX_ITEMS"]
|
|
197
207
|
backend = cls(source, dryrun=True, max_items=max_items)
|
|
198
208
|
return backend.harvest()
|
|
199
209
|
|
|
200
210
|
|
|
201
|
-
def preview_from_config(
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
+
def preview_from_config(
|
|
212
|
+
name,
|
|
213
|
+
url,
|
|
214
|
+
backend,
|
|
215
|
+
description=None,
|
|
216
|
+
frequency=DEFAULT_HARVEST_FREQUENCY,
|
|
217
|
+
owner=None,
|
|
218
|
+
organization=None,
|
|
219
|
+
config=None,
|
|
220
|
+
active=None,
|
|
221
|
+
autoarchive=None,
|
|
222
|
+
):
|
|
223
|
+
"""Preview an harvesting from a source created with the given parameters"""
|
|
211
224
|
if owner and not isinstance(owner, User):
|
|
212
225
|
owner = User.get(owner)
|
|
213
226
|
|
|
@@ -227,14 +240,15 @@ def preview_from_config(name, url, backend,
|
|
|
227
240
|
autoarchive=autoarchive,
|
|
228
241
|
)
|
|
229
242
|
cls = backends.get(current_app, source.backend)
|
|
230
|
-
max_items = current_app.config[
|
|
243
|
+
max_items = current_app.config["HARVEST_PREVIEW_MAX_ITEMS"]
|
|
231
244
|
backend = cls(source, dryrun=True, max_items=max_items)
|
|
232
245
|
return backend.harvest()
|
|
233
246
|
|
|
234
247
|
|
|
235
|
-
def schedule(
|
|
236
|
-
|
|
237
|
-
|
|
248
|
+
def schedule(
|
|
249
|
+
ident, cron=None, minute="*", hour="*", day_of_week="*", day_of_month="*", month_of_year="*"
|
|
250
|
+
):
|
|
251
|
+
"""Schedule an harvesting on a source given a crontab"""
|
|
238
252
|
source = get_source(ident)
|
|
239
253
|
|
|
240
254
|
if cron:
|
|
@@ -245,29 +259,31 @@ def schedule(ident, cron=None, minute='*', hour='*',
|
|
|
245
259
|
hour=str(hour),
|
|
246
260
|
day_of_week=str(day_of_week),
|
|
247
261
|
day_of_month=str(day_of_month),
|
|
248
|
-
month_of_year=str(month_of_year)
|
|
262
|
+
month_of_year=str(month_of_year),
|
|
249
263
|
)
|
|
250
264
|
|
|
251
265
|
if source.periodic_task:
|
|
252
266
|
source.periodic_task.modify(crontab=crontab)
|
|
253
267
|
else:
|
|
254
|
-
source.modify(
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
268
|
+
source.modify(
|
|
269
|
+
periodic_task=PeriodicTask.objects.create(
|
|
270
|
+
task="harvest",
|
|
271
|
+
name="Harvest {0}".format(source.name),
|
|
272
|
+
description="Periodic Harvesting",
|
|
273
|
+
enabled=True,
|
|
274
|
+
args=[str(source.id)],
|
|
275
|
+
crontab=crontab,
|
|
276
|
+
)
|
|
277
|
+
)
|
|
262
278
|
signals.harvest_source_scheduled.send(source)
|
|
263
279
|
return source
|
|
264
280
|
|
|
265
281
|
|
|
266
282
|
def unschedule(ident):
|
|
267
|
-
|
|
283
|
+
"""Unschedule an harvesting on a source"""
|
|
268
284
|
source = get_source(ident)
|
|
269
285
|
if not source.periodic_task:
|
|
270
|
-
msg =
|
|
286
|
+
msg = "Harvesting on source {0} is ot scheduled".format(source.name)
|
|
271
287
|
raise ValueError(msg)
|
|
272
288
|
|
|
273
289
|
source.periodic_task.delete()
|
|
@@ -275,11 +291,11 @@ def unschedule(ident):
|
|
|
275
291
|
return source
|
|
276
292
|
|
|
277
293
|
|
|
278
|
-
AttachResult = namedtuple(
|
|
294
|
+
AttachResult = namedtuple("AttachResult", ["success", "errors"])
|
|
279
295
|
|
|
280
296
|
|
|
281
297
|
def attach(domain, filename):
|
|
282
|
-
|
|
298
|
+
"""Attach existing dataset to their harvest remote id before harvesting.
|
|
283
299
|
|
|
284
300
|
The expected csv file format is the following:
|
|
285
301
|
|
|
@@ -288,32 +304,28 @@ def attach(domain, filename):
|
|
|
288
304
|
|
|
289
305
|
The delimiter should be ";". columns order
|
|
290
306
|
and extras columns does not matter
|
|
291
|
-
|
|
307
|
+
"""
|
|
292
308
|
count = 0
|
|
293
309
|
errors = 0
|
|
294
310
|
with open(filename) as csvfile:
|
|
295
|
-
reader = csv.DictReader(csvfile, delimiter=
|
|
311
|
+
reader = csv.DictReader(csvfile, delimiter=";", quotechar='"')
|
|
296
312
|
for row in reader:
|
|
297
313
|
try:
|
|
298
|
-
dataset = Dataset.objects.get(id=ObjectId(row[
|
|
314
|
+
dataset = Dataset.objects.get(id=ObjectId(row["local"]))
|
|
299
315
|
except: # noqa (Never stop on failure)
|
|
300
|
-
log.warning(
|
|
316
|
+
log.warning("Unable to attach dataset : %s", row["local"])
|
|
301
317
|
errors += 1
|
|
302
318
|
continue
|
|
303
319
|
|
|
304
320
|
# Detach previously attached dataset
|
|
305
|
-
Dataset.objects(
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
}).update(**{
|
|
309
|
-
'unset__harvest__domain': True,
|
|
310
|
-
'unset__harvest__remote_id': True
|
|
311
|
-
})
|
|
321
|
+
Dataset.objects(
|
|
322
|
+
**{"harvest__domain": domain, "harvest__remote_id": row["remote"]}
|
|
323
|
+
).update(**{"unset__harvest__domain": True, "unset__harvest__remote_id": True})
|
|
312
324
|
|
|
313
325
|
if not dataset.harvest:
|
|
314
326
|
dataset.harvest = HarvestDatasetMetadata()
|
|
315
327
|
dataset.harvest.domain = domain
|
|
316
|
-
dataset.harvest.remote_id = row[
|
|
328
|
+
dataset.harvest.remote_id = row["remote"]
|
|
317
329
|
|
|
318
330
|
dataset.last_modified_internal = datetime.utcnow()
|
|
319
331
|
dataset.save()
|