udata 9.1.2.dev30355__py2.py3-none-any.whl → 9.1.2.dev30382__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of udata might be problematic. Click here for more details.
- tasks/__init__.py +109 -107
- tasks/helpers.py +18 -18
- udata/__init__.py +4 -4
- udata/admin/views.py +5 -5
- udata/api/__init__.py +135 -124
- udata/api/commands.py +45 -37
- udata/api/errors.py +5 -4
- udata/api/fields.py +23 -21
- udata/api/oauth2.py +55 -74
- udata/api/parsers.py +15 -15
- udata/api/signals.py +1 -1
- udata/api_fields.py +137 -89
- udata/app.py +56 -54
- udata/assets.py +5 -5
- udata/auth/__init__.py +37 -26
- udata/auth/forms.py +23 -15
- udata/auth/helpers.py +1 -1
- udata/auth/mails.py +3 -3
- udata/auth/password_validation.py +19 -15
- udata/auth/views.py +94 -68
- udata/commands/__init__.py +71 -69
- udata/commands/cache.py +7 -7
- udata/commands/db.py +201 -140
- udata/commands/dcat.py +36 -30
- udata/commands/fixtures.py +100 -84
- udata/commands/images.py +21 -20
- udata/commands/info.py +17 -20
- udata/commands/init.py +10 -10
- udata/commands/purge.py +12 -13
- udata/commands/serve.py +41 -29
- udata/commands/static.py +16 -18
- udata/commands/test.py +20 -20
- udata/commands/tests/fixtures.py +26 -24
- udata/commands/worker.py +31 -33
- udata/core/__init__.py +12 -12
- udata/core/activity/__init__.py +0 -1
- udata/core/activity/api.py +59 -49
- udata/core/activity/models.py +28 -26
- udata/core/activity/signals.py +1 -1
- udata/core/activity/tasks.py +16 -10
- udata/core/badges/api.py +6 -6
- udata/core/badges/commands.py +14 -13
- udata/core/badges/fields.py +8 -5
- udata/core/badges/forms.py +7 -4
- udata/core/badges/models.py +16 -31
- udata/core/badges/permissions.py +1 -3
- udata/core/badges/signals.py +2 -2
- udata/core/badges/tasks.py +3 -2
- udata/core/badges/tests/test_commands.py +10 -10
- udata/core/badges/tests/test_model.py +24 -31
- udata/core/contact_point/api.py +19 -18
- udata/core/contact_point/api_fields.py +21 -14
- udata/core/contact_point/factories.py +2 -2
- udata/core/contact_point/forms.py +7 -6
- udata/core/contact_point/models.py +3 -5
- udata/core/dataservices/api.py +26 -21
- udata/core/dataservices/factories.py +13 -11
- udata/core/dataservices/models.py +35 -40
- udata/core/dataservices/permissions.py +4 -4
- udata/core/dataservices/rdf.py +40 -17
- udata/core/dataservices/tasks.py +4 -3
- udata/core/dataset/actions.py +10 -10
- udata/core/dataset/activities.py +21 -23
- udata/core/dataset/api.py +321 -298
- udata/core/dataset/api_fields.py +443 -271
- udata/core/dataset/apiv2.py +305 -229
- udata/core/dataset/commands.py +38 -36
- udata/core/dataset/constants.py +61 -54
- udata/core/dataset/csv.py +70 -74
- udata/core/dataset/events.py +39 -32
- udata/core/dataset/exceptions.py +8 -4
- udata/core/dataset/factories.py +57 -65
- udata/core/dataset/forms.py +87 -63
- udata/core/dataset/models.py +336 -280
- udata/core/dataset/permissions.py +9 -6
- udata/core/dataset/preview.py +15 -17
- udata/core/dataset/rdf.py +156 -122
- udata/core/dataset/search.py +92 -77
- udata/core/dataset/signals.py +1 -1
- udata/core/dataset/tasks.py +63 -54
- udata/core/discussions/actions.py +5 -5
- udata/core/discussions/api.py +124 -120
- udata/core/discussions/factories.py +2 -2
- udata/core/discussions/forms.py +9 -7
- udata/core/discussions/metrics.py +1 -3
- udata/core/discussions/models.py +25 -24
- udata/core/discussions/notifications.py +18 -14
- udata/core/discussions/permissions.py +3 -3
- udata/core/discussions/signals.py +4 -4
- udata/core/discussions/tasks.py +24 -28
- udata/core/followers/api.py +32 -33
- udata/core/followers/models.py +9 -9
- udata/core/followers/signals.py +3 -3
- udata/core/jobs/actions.py +7 -7
- udata/core/jobs/api.py +99 -92
- udata/core/jobs/commands.py +48 -49
- udata/core/jobs/forms.py +11 -11
- udata/core/jobs/models.py +6 -6
- udata/core/metrics/__init__.py +2 -2
- udata/core/metrics/commands.py +34 -30
- udata/core/metrics/models.py +2 -4
- udata/core/metrics/signals.py +1 -1
- udata/core/metrics/tasks.py +3 -3
- udata/core/organization/activities.py +12 -15
- udata/core/organization/api.py +167 -174
- udata/core/organization/api_fields.py +183 -124
- udata/core/organization/apiv2.py +32 -32
- udata/core/organization/commands.py +20 -22
- udata/core/organization/constants.py +11 -11
- udata/core/organization/csv.py +17 -15
- udata/core/organization/factories.py +8 -11
- udata/core/organization/forms.py +32 -26
- udata/core/organization/metrics.py +2 -1
- udata/core/organization/models.py +87 -67
- udata/core/organization/notifications.py +18 -14
- udata/core/organization/permissions.py +10 -11
- udata/core/organization/rdf.py +14 -14
- udata/core/organization/search.py +30 -28
- udata/core/organization/signals.py +7 -7
- udata/core/organization/tasks.py +42 -61
- udata/core/owned.py +38 -27
- udata/core/post/api.py +82 -81
- udata/core/post/constants.py +8 -5
- udata/core/post/factories.py +4 -4
- udata/core/post/forms.py +13 -14
- udata/core/post/models.py +20 -22
- udata/core/post/tests/test_api.py +30 -32
- udata/core/reports/api.py +8 -7
- udata/core/reports/constants.py +1 -3
- udata/core/reports/models.py +10 -10
- udata/core/reuse/activities.py +15 -19
- udata/core/reuse/api.py +123 -126
- udata/core/reuse/api_fields.py +120 -85
- udata/core/reuse/apiv2.py +11 -10
- udata/core/reuse/constants.py +23 -23
- udata/core/reuse/csv.py +18 -18
- udata/core/reuse/factories.py +5 -9
- udata/core/reuse/forms.py +24 -21
- udata/core/reuse/models.py +55 -51
- udata/core/reuse/permissions.py +2 -2
- udata/core/reuse/search.py +49 -46
- udata/core/reuse/signals.py +1 -1
- udata/core/reuse/tasks.py +4 -5
- udata/core/site/api.py +47 -50
- udata/core/site/factories.py +2 -2
- udata/core/site/forms.py +4 -5
- udata/core/site/models.py +94 -63
- udata/core/site/rdf.py +14 -14
- udata/core/spam/api.py +16 -9
- udata/core/spam/constants.py +4 -4
- udata/core/spam/fields.py +13 -7
- udata/core/spam/models.py +27 -20
- udata/core/spam/signals.py +1 -1
- udata/core/spam/tests/test_spam.py +6 -5
- udata/core/spatial/api.py +72 -80
- udata/core/spatial/api_fields.py +73 -58
- udata/core/spatial/commands.py +67 -64
- udata/core/spatial/constants.py +3 -3
- udata/core/spatial/factories.py +37 -54
- udata/core/spatial/forms.py +27 -26
- udata/core/spatial/geoids.py +17 -17
- udata/core/spatial/models.py +43 -47
- udata/core/spatial/tasks.py +2 -1
- udata/core/spatial/tests/test_api.py +115 -130
- udata/core/spatial/tests/test_fields.py +74 -77
- udata/core/spatial/tests/test_geoid.py +22 -22
- udata/core/spatial/tests/test_models.py +5 -7
- udata/core/spatial/translations.py +16 -16
- udata/core/storages/__init__.py +16 -18
- udata/core/storages/api.py +66 -64
- udata/core/storages/tasks.py +7 -7
- udata/core/storages/utils.py +15 -15
- udata/core/storages/views.py +5 -6
- udata/core/tags/api.py +17 -14
- udata/core/tags/csv.py +4 -4
- udata/core/tags/models.py +8 -5
- udata/core/tags/tasks.py +11 -13
- udata/core/tags/views.py +4 -4
- udata/core/topic/api.py +84 -73
- udata/core/topic/apiv2.py +157 -127
- udata/core/topic/factories.py +3 -4
- udata/core/topic/forms.py +12 -14
- udata/core/topic/models.py +14 -19
- udata/core/topic/parsers.py +26 -26
- udata/core/user/activities.py +30 -29
- udata/core/user/api.py +151 -152
- udata/core/user/api_fields.py +132 -100
- udata/core/user/apiv2.py +7 -7
- udata/core/user/commands.py +38 -38
- udata/core/user/factories.py +8 -9
- udata/core/user/forms.py +14 -11
- udata/core/user/metrics.py +2 -2
- udata/core/user/models.py +68 -69
- udata/core/user/permissions.py +4 -5
- udata/core/user/rdf.py +7 -8
- udata/core/user/tasks.py +2 -2
- udata/core/user/tests/test_user_model.py +24 -16
- udata/db/tasks.py +2 -1
- udata/entrypoints.py +35 -31
- udata/errors.py +2 -1
- udata/event/values.py +6 -6
- udata/factories.py +2 -2
- udata/features/identicon/api.py +5 -6
- udata/features/identicon/backends.py +48 -55
- udata/features/identicon/tests/test_backends.py +4 -5
- udata/features/notifications/__init__.py +0 -1
- udata/features/notifications/actions.py +9 -9
- udata/features/notifications/api.py +17 -13
- udata/features/territories/__init__.py +12 -10
- udata/features/territories/api.py +14 -15
- udata/features/territories/models.py +23 -28
- udata/features/transfer/actions.py +8 -11
- udata/features/transfer/api.py +84 -77
- udata/features/transfer/factories.py +2 -1
- udata/features/transfer/models.py +11 -12
- udata/features/transfer/notifications.py +19 -15
- udata/features/transfer/permissions.py +5 -5
- udata/forms/__init__.py +5 -2
- udata/forms/fields.py +164 -172
- udata/forms/validators.py +19 -22
- udata/forms/widgets.py +9 -13
- udata/frontend/__init__.py +31 -26
- udata/frontend/csv.py +68 -58
- udata/frontend/markdown.py +40 -44
- udata/harvest/actions.py +89 -77
- udata/harvest/api.py +294 -238
- udata/harvest/backends/__init__.py +4 -4
- udata/harvest/backends/base.py +128 -111
- udata/harvest/backends/dcat.py +80 -66
- udata/harvest/commands.py +56 -60
- udata/harvest/csv.py +8 -8
- udata/harvest/exceptions.py +6 -3
- udata/harvest/filters.py +24 -23
- udata/harvest/forms.py +27 -28
- udata/harvest/models.py +88 -80
- udata/harvest/notifications.py +15 -10
- udata/harvest/signals.py +13 -13
- udata/harvest/tasks.py +11 -10
- udata/harvest/tests/factories.py +23 -24
- udata/harvest/tests/test_actions.py +136 -166
- udata/harvest/tests/test_api.py +220 -214
- udata/harvest/tests/test_base_backend.py +117 -112
- udata/harvest/tests/test_dcat_backend.py +380 -308
- udata/harvest/tests/test_filters.py +33 -22
- udata/harvest/tests/test_models.py +11 -14
- udata/harvest/tests/test_notifications.py +6 -7
- udata/harvest/tests/test_tasks.py +7 -6
- udata/i18n.py +237 -78
- udata/linkchecker/backends.py +5 -11
- udata/linkchecker/checker.py +23 -22
- udata/linkchecker/commands.py +4 -6
- udata/linkchecker/models.py +6 -6
- udata/linkchecker/tasks.py +18 -20
- udata/mail.py +21 -21
- udata/migrations/2020-07-24-remove-s-from-scope-oauth.py +9 -8
- udata/migrations/2020-08-24-add-fs-filename.py +9 -8
- udata/migrations/2020-09-28-update-reuses-datasets-metrics.py +5 -4
- udata/migrations/2020-10-16-migrate-ods-resources.py +9 -10
- udata/migrations/2021-04-08-update-schema-with-new-structure.py +8 -7
- udata/migrations/2021-05-27-fix-default-schema-name.py +7 -6
- udata/migrations/2021-07-05-remove-unused-badges.py +17 -15
- udata/migrations/2021-07-07-update-schema-for-community-resources.py +7 -6
- udata/migrations/2021-08-17-follow-integrity.py +5 -4
- udata/migrations/2021-08-17-harvest-integrity.py +13 -12
- udata/migrations/2021-08-17-oauth2client-integrity.py +5 -4
- udata/migrations/2021-08-17-transfer-integrity.py +5 -4
- udata/migrations/2021-08-17-users-integrity.py +9 -8
- udata/migrations/2021-12-14-reuse-topics.py +7 -6
- udata/migrations/2022-04-21-improve-extension-detection.py +8 -7
- udata/migrations/2022-09-22-clean-inactive-harvest-datasets.py +16 -14
- udata/migrations/2022-10-10-add-fs_uniquifier-to-user-model.py +6 -6
- udata/migrations/2022-10-10-migrate-harvest-extras.py +36 -26
- udata/migrations/2023-02-08-rename-internal-dates.py +46 -28
- udata/migrations/2024-01-29-fix-reuse-and-dataset-with-private-None.py +10 -8
- udata/migrations/2024-03-22-migrate-activity-kwargs-to-extras.py +6 -4
- udata/migrations/2024-06-11-fix-reuse-datasets-references.py +7 -6
- udata/migrations/__init__.py +123 -105
- udata/models/__init__.py +4 -4
- udata/mongo/__init__.py +13 -11
- udata/mongo/badges_field.py +3 -2
- udata/mongo/datetime_fields.py +13 -12
- udata/mongo/document.py +17 -16
- udata/mongo/engine.py +15 -16
- udata/mongo/errors.py +2 -1
- udata/mongo/extras_fields.py +30 -20
- udata/mongo/queryset.py +12 -12
- udata/mongo/slug_fields.py +38 -28
- udata/mongo/taglist_field.py +1 -2
- udata/mongo/url_field.py +5 -5
- udata/mongo/uuid_fields.py +4 -3
- udata/notifications/__init__.py +1 -1
- udata/notifications/mattermost.py +10 -9
- udata/rdf.py +167 -188
- udata/routing.py +40 -45
- udata/search/__init__.py +18 -19
- udata/search/adapter.py +17 -16
- udata/search/commands.py +44 -51
- udata/search/fields.py +13 -20
- udata/search/query.py +23 -18
- udata/search/result.py +9 -10
- udata/sentry.py +21 -19
- udata/settings.py +262 -198
- udata/sitemap.py +8 -6
- udata/static/chunks/{11.e9b9ca1f3e03d4020377.js → 11.52e531c19f8de80c00cf.js} +3 -3
- udata/static/chunks/{11.e9b9ca1f3e03d4020377.js.map → 11.52e531c19f8de80c00cf.js.map} +1 -1
- udata/static/chunks/{13.038c0d9aa0dfa0181c4b.js → 13.c3343a7f1070061c0e10.js} +2 -2
- udata/static/chunks/{13.038c0d9aa0dfa0181c4b.js.map → 13.c3343a7f1070061c0e10.js.map} +1 -1
- udata/static/chunks/{16.0baa2b64a74a2dcde25c.js → 16.8fa42440ad75ca172e6d.js} +2 -2
- udata/static/chunks/{16.0baa2b64a74a2dcde25c.js.map → 16.8fa42440ad75ca172e6d.js.map} +1 -1
- udata/static/chunks/{19.350a9f150b074b4ecefa.js → 19.9c6c8412729cd6d59cfa.js} +3 -3
- udata/static/chunks/{19.350a9f150b074b4ecefa.js.map → 19.9c6c8412729cd6d59cfa.js.map} +1 -1
- udata/static/chunks/{5.6ebbce2b9b3e696d3da5.js → 5.71d15c2e4f21feee2a9a.js} +3 -3
- udata/static/chunks/{5.6ebbce2b9b3e696d3da5.js.map → 5.71d15c2e4f21feee2a9a.js.map} +1 -1
- udata/static/chunks/{6.d8a5f7b017bcbd083641.js → 6.9139dc098b8ea640b890.js} +3 -3
- udata/static/chunks/{6.d8a5f7b017bcbd083641.js.map → 6.9139dc098b8ea640b890.js.map} +1 -1
- udata/static/common.js +1 -1
- udata/static/common.js.map +1 -1
- udata/storage/s3.py +20 -13
- udata/tags.py +4 -5
- udata/tasks.py +43 -42
- udata/tests/__init__.py +9 -6
- udata/tests/api/__init__.py +5 -6
- udata/tests/api/test_auth_api.py +395 -321
- udata/tests/api/test_base_api.py +31 -33
- udata/tests/api/test_contact_points.py +7 -9
- udata/tests/api/test_dataservices_api.py +211 -158
- udata/tests/api/test_datasets_api.py +823 -812
- udata/tests/api/test_follow_api.py +13 -15
- udata/tests/api/test_me_api.py +95 -112
- udata/tests/api/test_organizations_api.py +301 -339
- udata/tests/api/test_reports_api.py +35 -25
- udata/tests/api/test_reuses_api.py +134 -139
- udata/tests/api/test_swagger.py +5 -5
- udata/tests/api/test_tags_api.py +18 -25
- udata/tests/api/test_topics_api.py +94 -94
- udata/tests/api/test_transfer_api.py +53 -48
- udata/tests/api/test_user_api.py +128 -141
- udata/tests/apiv2/test_datasets.py +290 -198
- udata/tests/apiv2/test_me_api.py +10 -11
- udata/tests/apiv2/test_organizations.py +56 -74
- udata/tests/apiv2/test_swagger.py +5 -5
- udata/tests/apiv2/test_topics.py +69 -87
- udata/tests/cli/test_cli_base.py +8 -8
- udata/tests/cli/test_db_cli.py +21 -19
- udata/tests/dataservice/test_dataservice_tasks.py +8 -12
- udata/tests/dataset/test_csv_adapter.py +44 -35
- udata/tests/dataset/test_dataset_actions.py +2 -3
- udata/tests/dataset/test_dataset_commands.py +7 -8
- udata/tests/dataset/test_dataset_events.py +36 -29
- udata/tests/dataset/test_dataset_model.py +224 -217
- udata/tests/dataset/test_dataset_rdf.py +142 -131
- udata/tests/dataset/test_dataset_tasks.py +15 -15
- udata/tests/dataset/test_resource_preview.py +10 -13
- udata/tests/features/territories/__init__.py +9 -13
- udata/tests/features/territories/test_territories_api.py +71 -91
- udata/tests/forms/test_basic_fields.py +7 -7
- udata/tests/forms/test_current_user_field.py +39 -66
- udata/tests/forms/test_daterange_field.py +31 -39
- udata/tests/forms/test_dict_field.py +28 -26
- udata/tests/forms/test_extras_fields.py +102 -76
- udata/tests/forms/test_form_field.py +8 -8
- udata/tests/forms/test_image_field.py +33 -26
- udata/tests/forms/test_model_field.py +134 -123
- udata/tests/forms/test_model_list_field.py +7 -7
- udata/tests/forms/test_nested_model_list_field.py +117 -79
- udata/tests/forms/test_publish_as_field.py +36 -65
- udata/tests/forms/test_reference_field.py +34 -53
- udata/tests/forms/test_user_forms.py +23 -21
- udata/tests/forms/test_uuid_field.py +6 -10
- udata/tests/frontend/__init__.py +9 -6
- udata/tests/frontend/test_auth.py +7 -6
- udata/tests/frontend/test_csv.py +81 -96
- udata/tests/frontend/test_hooks.py +43 -43
- udata/tests/frontend/test_markdown.py +211 -191
- udata/tests/helpers.py +32 -37
- udata/tests/models.py +2 -2
- udata/tests/organization/test_csv_adapter.py +21 -16
- udata/tests/organization/test_notifications.py +11 -18
- udata/tests/organization/test_organization_model.py +13 -13
- udata/tests/organization/test_organization_rdf.py +29 -22
- udata/tests/organization/test_organization_tasks.py +16 -17
- udata/tests/plugin.py +76 -73
- udata/tests/reuse/test_reuse_model.py +21 -21
- udata/tests/reuse/test_reuse_task.py +11 -13
- udata/tests/search/__init__.py +11 -12
- udata/tests/search/test_adapter.py +60 -70
- udata/tests/search/test_query.py +16 -16
- udata/tests/search/test_results.py +10 -7
- udata/tests/site/test_site_api.py +11 -16
- udata/tests/site/test_site_metrics.py +20 -30
- udata/tests/site/test_site_model.py +4 -5
- udata/tests/site/test_site_rdf.py +94 -78
- udata/tests/test_activity.py +17 -17
- udata/tests/test_discussions.py +292 -299
- udata/tests/test_i18n.py +37 -40
- udata/tests/test_linkchecker.py +91 -85
- udata/tests/test_mail.py +13 -17
- udata/tests/test_migrations.py +219 -180
- udata/tests/test_model.py +164 -157
- udata/tests/test_notifications.py +17 -17
- udata/tests/test_owned.py +14 -14
- udata/tests/test_rdf.py +25 -23
- udata/tests/test_routing.py +89 -93
- udata/tests/test_storages.py +137 -128
- udata/tests/test_tags.py +44 -46
- udata/tests/test_topics.py +7 -7
- udata/tests/test_transfer.py +42 -49
- udata/tests/test_uris.py +160 -161
- udata/tests/test_utils.py +79 -71
- udata/tests/user/test_user_rdf.py +5 -9
- udata/tests/workers/test_jobs_commands.py +57 -58
- udata/tests/workers/test_tasks_routing.py +23 -29
- udata/tests/workers/test_workers_api.py +125 -131
- udata/tests/workers/test_workers_helpers.py +6 -6
- udata/tracking.py +4 -6
- udata/uris.py +45 -46
- udata/utils.py +68 -66
- udata/wsgi.py +1 -1
- {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30382.dist-info}/METADATA +3 -2
- udata-9.1.2.dev30382.dist-info/RECORD +704 -0
- udata-9.1.2.dev30355.dist-info/RECORD +0 -704
- {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30382.dist-info}/LICENSE +0 -0
- {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30382.dist-info}/WHEEL +0 -0
- {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30382.dist-info}/entry_points.txt +0 -0
- {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30382.dist-info}/top_level.txt +0 -0
|
@@ -1,32 +1,31 @@
|
|
|
1
|
-
from datetime import date
|
|
2
1
|
import logging
|
|
3
2
|
import os
|
|
4
3
|
import re
|
|
5
|
-
|
|
6
|
-
import
|
|
4
|
+
import xml.etree.ElementTree as ET
|
|
5
|
+
from datetime import date
|
|
7
6
|
|
|
8
7
|
import boto3
|
|
8
|
+
import pytest
|
|
9
9
|
from flask import current_app
|
|
10
|
-
import xml.etree.ElementTree as ET
|
|
11
10
|
|
|
12
11
|
from udata.core.dataservices.models import Dataservice
|
|
12
|
+
from udata.core.dataset.factories import LicenseFactory, ResourceSchemaMockData
|
|
13
|
+
from udata.core.organization.factories import OrganizationFactory
|
|
13
14
|
from udata.harvest.models import HarvestJob
|
|
14
15
|
from udata.models import Dataset
|
|
15
|
-
from udata.core.organization.factories import OrganizationFactory
|
|
16
|
-
from udata.core.dataset.factories import LicenseFactory, ResourceSchemaMockData
|
|
17
16
|
from udata.storage.s3 import get_from_json
|
|
18
17
|
|
|
19
|
-
from .factories import HarvestSourceFactory
|
|
20
|
-
from ..backends.dcat import URIS_TO_REPLACE, CswIso19139DcatBackend
|
|
21
18
|
from .. import actions
|
|
19
|
+
from ..backends.dcat import URIS_TO_REPLACE, CswIso19139DcatBackend
|
|
20
|
+
from .factories import HarvestSourceFactory
|
|
22
21
|
|
|
23
22
|
log = logging.getLogger(__name__)
|
|
24
23
|
|
|
25
24
|
|
|
26
|
-
TEST_DOMAIN =
|
|
27
|
-
DCAT_URL_PATTERN =
|
|
28
|
-
DCAT_FILES_DIR = os.path.join(os.path.dirname(__file__),
|
|
29
|
-
CSW_DCAT_FILES_DIR = os.path.join(os.path.dirname(__file__),
|
|
25
|
+
TEST_DOMAIN = "data.test.org" # Need to be used in fixture file
|
|
26
|
+
DCAT_URL_PATTERN = "http://{domain}/{path}"
|
|
27
|
+
DCAT_FILES_DIR = os.path.join(os.path.dirname(__file__), "dcat")
|
|
28
|
+
CSW_DCAT_FILES_DIR = os.path.join(os.path.dirname(__file__), "csw_dcat")
|
|
30
29
|
|
|
31
30
|
|
|
32
31
|
def mock_dcat(rmock, filename, path=None):
|
|
@@ -41,7 +40,7 @@ def mock_pagination(rmock, path, pattern):
|
|
|
41
40
|
url = DCAT_URL_PATTERN.format(path=path, domain=TEST_DOMAIN)
|
|
42
41
|
|
|
43
42
|
def callback(request, context):
|
|
44
|
-
page = request.qs.get(
|
|
43
|
+
page = request.qs.get("page", [1])[0]
|
|
45
44
|
filename = pattern.format(page=page)
|
|
46
45
|
context.status_code = 200
|
|
47
46
|
with open(os.path.join(DCAT_FILES_DIR, filename)) as dcatfile:
|
|
@@ -56,7 +55,7 @@ def mock_csw_pagination(rmock, path, pattern):
|
|
|
56
55
|
|
|
57
56
|
def callback(request, context):
|
|
58
57
|
request_tree = ET.fromstring(request.body)
|
|
59
|
-
page = int(request_tree.get(
|
|
58
|
+
page = int(request_tree.get("startPosition"))
|
|
60
59
|
with open(os.path.join(CSW_DCAT_FILES_DIR, pattern.format(page))) as cswdcatfile:
|
|
61
60
|
return cswdcatfile.read()
|
|
62
61
|
|
|
@@ -64,17 +63,14 @@ def mock_csw_pagination(rmock, path, pattern):
|
|
|
64
63
|
return url
|
|
65
64
|
|
|
66
65
|
|
|
67
|
-
@pytest.mark.usefixtures(
|
|
68
|
-
@pytest.mark.options(PLUGINS=[
|
|
66
|
+
@pytest.mark.usefixtures("clean_db")
|
|
67
|
+
@pytest.mark.options(PLUGINS=["dcat"])
|
|
69
68
|
class DcatBackendTest:
|
|
70
|
-
|
|
71
69
|
def test_simple_flat(self, rmock):
|
|
72
|
-
filename =
|
|
70
|
+
filename = "flat.jsonld"
|
|
73
71
|
url = mock_dcat(rmock, filename)
|
|
74
72
|
org = OrganizationFactory()
|
|
75
|
-
source = HarvestSourceFactory(backend=
|
|
76
|
-
url=url,
|
|
77
|
-
organization=org)
|
|
73
|
+
source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
|
|
78
74
|
|
|
79
75
|
actions.run(source.slug)
|
|
80
76
|
|
|
@@ -87,17 +83,17 @@ class DcatBackendTest:
|
|
|
87
83
|
|
|
88
84
|
assert len(datasets) == 3
|
|
89
85
|
|
|
90
|
-
for i in
|
|
86
|
+
for i in "1 2 3".split():
|
|
91
87
|
d = datasets[i]
|
|
92
|
-
assert d.title == f
|
|
93
|
-
assert d.description == f
|
|
88
|
+
assert d.title == f"Dataset {i}"
|
|
89
|
+
assert d.description == f"Dataset {i} description"
|
|
94
90
|
assert d.harvest.remote_id == i
|
|
95
|
-
assert d.harvest.backend ==
|
|
91
|
+
assert d.harvest.backend == "DCAT"
|
|
96
92
|
assert d.harvest.source_id == str(source.id)
|
|
97
93
|
assert d.harvest.domain == source.domain
|
|
98
94
|
assert d.harvest.dct_identifier == i
|
|
99
|
-
assert d.harvest.remote_url == f
|
|
100
|
-
assert d.harvest.uri == f
|
|
95
|
+
assert d.harvest.remote_url == f"http://data.test.org/datasets/{i}"
|
|
96
|
+
assert d.harvest.uri == f"http://data.test.org/datasets/{i}"
|
|
101
97
|
assert d.harvest.created_at.date() == date(2016, 12, 14)
|
|
102
98
|
assert d.harvest.modified_at.date() == date(2016, 12, 14)
|
|
103
99
|
assert d.harvest.last_update.date() == date.today()
|
|
@@ -105,72 +101,72 @@ class DcatBackendTest:
|
|
|
105
101
|
assert d.harvest.archived is None
|
|
106
102
|
|
|
107
103
|
# First dataset
|
|
108
|
-
dataset = datasets[
|
|
109
|
-
assert dataset.tags == [
|
|
110
|
-
'theme-1', 'theme-2']
|
|
104
|
+
dataset = datasets["1"]
|
|
105
|
+
assert dataset.tags == ["tag-1", "tag-2", "tag-3", "tag-4", "theme-1", "theme-2"]
|
|
111
106
|
assert len(dataset.resources) == 2
|
|
112
107
|
|
|
113
108
|
# Second dataset
|
|
114
|
-
dataset = datasets[
|
|
115
|
-
assert dataset.tags == [
|
|
109
|
+
dataset = datasets["2"]
|
|
110
|
+
assert dataset.tags == ["tag-1", "tag-2", "tag-3"]
|
|
116
111
|
assert len(dataset.resources) == 2
|
|
117
112
|
|
|
118
113
|
# Third dataset
|
|
119
|
-
dataset = datasets[
|
|
120
|
-
assert dataset.tags == [
|
|
114
|
+
dataset = datasets["3"]
|
|
115
|
+
assert dataset.tags == ["tag-1", "tag-2"]
|
|
121
116
|
assert len(dataset.resources) == 1
|
|
122
117
|
|
|
123
118
|
def test_flat_with_blank_nodes(self, rmock):
|
|
124
|
-
filename =
|
|
119
|
+
filename = "bnodes.jsonld"
|
|
125
120
|
url = mock_dcat(rmock, filename)
|
|
126
121
|
org = OrganizationFactory()
|
|
127
|
-
source = HarvestSourceFactory(backend=
|
|
128
|
-
url=url,
|
|
129
|
-
organization=org)
|
|
122
|
+
source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
|
|
130
123
|
|
|
131
124
|
actions.run(source.slug)
|
|
132
125
|
|
|
133
126
|
datasets = {d.harvest.dct_identifier: d for d in Dataset.objects}
|
|
134
127
|
|
|
135
128
|
assert len(datasets) == 3
|
|
136
|
-
assert len(datasets[
|
|
137
|
-
assert len(datasets[
|
|
138
|
-
assert len(datasets[
|
|
139
|
-
|
|
140
|
-
assert datasets[
|
|
141
|
-
assert datasets[
|
|
142
|
-
assert datasets[
|
|
143
|
-
assert datasets[
|
|
144
|
-
|
|
145
|
-
@pytest.mark.options(
|
|
129
|
+
assert len(datasets["1"].resources) == 2
|
|
130
|
+
assert len(datasets["2"].resources) == 2
|
|
131
|
+
assert len(datasets["3"].resources) == 1
|
|
132
|
+
|
|
133
|
+
assert datasets["1"].resources[0].title == "Resource 1-1"
|
|
134
|
+
assert datasets["1"].resources[0].description == "A JSON resource"
|
|
135
|
+
assert datasets["1"].resources[0].format == "json"
|
|
136
|
+
assert datasets["1"].resources[0].mime == "application/json"
|
|
137
|
+
|
|
138
|
+
@pytest.mark.options(
|
|
139
|
+
SCHEMA_CATALOG_URL="https://example.com/schemas",
|
|
140
|
+
HARVEST_MAX_CATALOG_SIZE_IN_MONGO=None,
|
|
141
|
+
HARVEST_GRAPHS_S3_BUCKET="test_bucket",
|
|
142
|
+
S3_URL="https://example.org",
|
|
143
|
+
S3_ACCESS_KEY_ID="myUser",
|
|
144
|
+
S3_SECRET_ACCESS_KEY="password",
|
|
145
|
+
)
|
|
146
146
|
def test_flat_with_blank_nodes_xml(self, rmock):
|
|
147
|
-
rmock.get(
|
|
147
|
+
rmock.get("https://example.com/schemas", json=ResourceSchemaMockData.get_mock_data())
|
|
148
148
|
|
|
149
|
-
filename =
|
|
149
|
+
filename = "bnodes.xml"
|
|
150
150
|
url = mock_dcat(rmock, filename)
|
|
151
151
|
org = OrganizationFactory()
|
|
152
|
-
source = HarvestSourceFactory(backend=
|
|
153
|
-
url=url,
|
|
154
|
-
organization=org)
|
|
152
|
+
source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
|
|
155
153
|
|
|
156
154
|
actions.run(source.slug)
|
|
157
155
|
|
|
158
156
|
datasets = {d.harvest.dct_identifier: d for d in Dataset.objects}
|
|
159
157
|
|
|
160
158
|
assert len(datasets) == 3
|
|
161
|
-
assert len(datasets[
|
|
162
|
-
assert len(datasets[
|
|
163
|
-
assert len(datasets[
|
|
159
|
+
assert len(datasets["3"].resources) == 1
|
|
160
|
+
assert len(datasets["1"].resources) == 2
|
|
161
|
+
assert len(datasets["2"].resources) == 2
|
|
164
162
|
|
|
165
163
|
def test_harvest_dataservices(self, rmock):
|
|
166
|
-
rmock.get(
|
|
164
|
+
rmock.get("https://example.com/schemas", json=ResourceSchemaMockData.get_mock_data())
|
|
167
165
|
|
|
168
|
-
filename =
|
|
166
|
+
filename = "bnodes.xml"
|
|
169
167
|
url = mock_dcat(rmock, filename)
|
|
170
168
|
org = OrganizationFactory()
|
|
171
|
-
source = HarvestSourceFactory(backend=
|
|
172
|
-
url=url,
|
|
173
|
-
organization=org)
|
|
169
|
+
source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
|
|
174
170
|
|
|
175
171
|
actions.run(source.slug)
|
|
176
172
|
|
|
@@ -179,30 +175,57 @@ class DcatBackendTest:
|
|
|
179
175
|
assert len(dataservices) == 1
|
|
180
176
|
assert dataservices[0].title == "Explore API v2"
|
|
181
177
|
assert dataservices[0].base_api_url == "https://data.paris2024.org/api/explore/v2.1/"
|
|
182
|
-
assert
|
|
183
|
-
|
|
178
|
+
assert (
|
|
179
|
+
dataservices[0].endpoint_description_url
|
|
180
|
+
== "https://data.paris2024.org/api/explore/v2.1/swagger.json"
|
|
181
|
+
)
|
|
182
|
+
assert (
|
|
183
|
+
dataservices[0].harvest.remote_url
|
|
184
|
+
== "https://data.paris2024.org/api/explore/v2.1/console"
|
|
185
|
+
)
|
|
184
186
|
|
|
185
187
|
def test_harvest_literal_spatial(self, rmock):
|
|
186
|
-
url = mock_dcat(rmock,
|
|
188
|
+
url = mock_dcat(rmock, "evian.json")
|
|
187
189
|
org = OrganizationFactory()
|
|
188
|
-
source = HarvestSourceFactory(backend=
|
|
189
|
-
|
|
190
|
-
organization=org)
|
|
191
|
-
|
|
190
|
+
source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
|
|
191
|
+
|
|
192
192
|
actions.run(source.slug)
|
|
193
193
|
|
|
194
194
|
datasets = {d.harvest.dct_identifier: d for d in Dataset.objects}
|
|
195
195
|
assert len(datasets) == 8
|
|
196
|
-
assert
|
|
197
|
-
|
|
198
|
-
|
|
196
|
+
assert (
|
|
197
|
+
datasets[
|
|
198
|
+
"https://www.arcgis.com/home/item.html?id=f6565516d1354383b25793e630cf3f2b&sublayer=5"
|
|
199
|
+
].spatial
|
|
200
|
+
is not None
|
|
201
|
+
)
|
|
202
|
+
assert datasets[
|
|
203
|
+
"https://www.arcgis.com/home/item.html?id=f6565516d1354383b25793e630cf3f2b&sublayer=5"
|
|
204
|
+
].spatial.geom == {
|
|
205
|
+
"type": "MultiPolygon",
|
|
206
|
+
"coordinates": [
|
|
207
|
+
[
|
|
208
|
+
[
|
|
209
|
+
[6.5735, 46.3912],
|
|
210
|
+
[6.6069, 46.3912],
|
|
211
|
+
[6.6069, 46.4028],
|
|
212
|
+
[6.5735, 46.4028],
|
|
213
|
+
[6.5735, 46.3912],
|
|
214
|
+
]
|
|
215
|
+
]
|
|
216
|
+
],
|
|
217
|
+
}
|
|
199
218
|
|
|
200
|
-
@pytest.mark.skip(
|
|
201
|
-
|
|
219
|
+
@pytest.mark.skip(
|
|
220
|
+
reason="Mocking S3 requires `moto` which is not available for our current Python 3.7. We can manually test it."
|
|
221
|
+
)
|
|
222
|
+
@pytest.mark.options(
|
|
223
|
+
SCHEMA_CATALOG_URL="https://example.com/schemas", HARVEST_JOBS_RETENTION_DAYS=0
|
|
224
|
+
)
|
|
202
225
|
# @mock_s3
|
|
203
226
|
# @pytest.mark.options(HARVEST_MAX_CATALOG_SIZE_IN_MONGO=15, HARVEST_GRAPHS_S3_BUCKET="test_bucket", S3_URL="https://example.org", S3_ACCESS_KEY_ID="myUser", S3_SECRET_ACCESS_KEY="password")
|
|
204
227
|
def test_harvest_big_catalog(self, rmock):
|
|
205
|
-
rmock.get(
|
|
228
|
+
rmock.get("https://example.com/schemas", json=ResourceSchemaMockData.get_mock_data())
|
|
206
229
|
|
|
207
230
|
# We need to create the bucket since this is all in Moto's 'virtual' AWS account
|
|
208
231
|
# conn = boto3.resource(
|
|
@@ -213,134 +236,153 @@ class DcatBackendTest:
|
|
|
213
236
|
# )
|
|
214
237
|
# conn.create_bucket(Bucket="test_bucket")
|
|
215
238
|
|
|
216
|
-
filename =
|
|
239
|
+
filename = "bnodes.xml"
|
|
217
240
|
url = mock_dcat(rmock, filename)
|
|
218
241
|
org = OrganizationFactory()
|
|
219
|
-
source = HarvestSourceFactory(backend=
|
|
220
|
-
url=url,
|
|
221
|
-
organization=org)
|
|
242
|
+
source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
|
|
222
243
|
|
|
223
244
|
actions.run(source.slug)
|
|
224
245
|
|
|
225
246
|
datasets = {d.harvest.dct_identifier: d for d in Dataset.objects}
|
|
226
247
|
|
|
227
|
-
assert datasets[
|
|
228
|
-
resources_by_title = {
|
|
248
|
+
assert datasets["1"].schema == None
|
|
249
|
+
resources_by_title = {resource["title"]: resource for resource in datasets["1"].resources}
|
|
229
250
|
|
|
230
251
|
# Schema with wrong version are considered as external. Maybe we could change this in the future
|
|
231
|
-
assert
|
|
232
|
-
|
|
233
|
-
|
|
252
|
+
assert (
|
|
253
|
+
resources_by_title["Resource 1-2"].schema.url
|
|
254
|
+
== "https://schema.data.gouv.fr/schemas/etalab/schema-irve-statique/1337.42.0/schema-statique.json"
|
|
255
|
+
)
|
|
256
|
+
assert resources_by_title["Resource 1-2"].schema.name == None
|
|
257
|
+
assert resources_by_title["Resource 1-2"].schema.version == None
|
|
234
258
|
|
|
235
|
-
assert datasets[
|
|
236
|
-
assert
|
|
237
|
-
|
|
259
|
+
assert datasets["2"].schema.name == "RGF93 / Lambert-93 (EPSG:2154)"
|
|
260
|
+
assert (
|
|
261
|
+
datasets["2"].schema.url
|
|
262
|
+
== "http://inspire.ec.europa.eu/glossary/SpatialReferenceSystem"
|
|
263
|
+
)
|
|
264
|
+
resources_by_title = {resource["title"]: resource for resource in datasets["2"].resources}
|
|
238
265
|
|
|
239
266
|
# Unknown schema are kept as they were provided
|
|
240
|
-
assert resources_by_title[
|
|
241
|
-
assert resources_by_title[
|
|
242
|
-
assert resources_by_title[
|
|
267
|
+
assert resources_by_title["Resource 2-1"].schema.name == "Example Schema"
|
|
268
|
+
assert resources_by_title["Resource 2-1"].schema.url == "https://example.org/schema.json"
|
|
269
|
+
assert resources_by_title["Resource 2-1"].schema.version == None
|
|
243
270
|
|
|
244
|
-
assert resources_by_title[
|
|
271
|
+
assert resources_by_title["Resource 2-2"].schema == None
|
|
245
272
|
|
|
246
|
-
assert datasets[
|
|
247
|
-
resources_by_title = {
|
|
273
|
+
assert datasets["3"].schema == None
|
|
274
|
+
resources_by_title = {resource["title"]: resource for resource in datasets["3"].resources}
|
|
248
275
|
|
|
249
276
|
# If there is just the URL, and it matches a known schema inside the catalog, only set the name and the version
|
|
250
277
|
# (discard the URL)
|
|
251
|
-
assert resources_by_title[
|
|
252
|
-
assert resources_by_title[
|
|
253
|
-
assert resources_by_title[
|
|
278
|
+
assert resources_by_title["Resource 3-1"].schema.name == "etalab/schema-irve-statique"
|
|
279
|
+
assert resources_by_title["Resource 3-1"].schema.url == None
|
|
280
|
+
assert resources_by_title["Resource 3-1"].schema.version == "2.2.0"
|
|
254
281
|
|
|
255
|
-
job = HarvestJob.objects.order_by(
|
|
282
|
+
job = HarvestJob.objects.order_by("-id").first()
|
|
256
283
|
|
|
257
284
|
assert job.source.slug == source.slug
|
|
258
|
-
assert
|
|
285
|
+
assert (
|
|
286
|
+
get_from_json(current_app.config.get("HARVEST_GRAPHS_S3_BUCKET"), job.data["filename"])
|
|
287
|
+
is not None
|
|
288
|
+
)
|
|
259
289
|
|
|
260
290
|
# Retention is 0 days in config
|
|
261
291
|
actions.purge_jobs()
|
|
262
|
-
assert
|
|
292
|
+
assert (
|
|
293
|
+
get_from_json(current_app.config.get("HARVEST_GRAPHS_S3_BUCKET"), job.data["filename"])
|
|
294
|
+
is None
|
|
295
|
+
)
|
|
263
296
|
|
|
264
|
-
@pytest.mark.options(SCHEMA_CATALOG_URL=
|
|
297
|
+
@pytest.mark.options(SCHEMA_CATALOG_URL="https://example.com/schemas", HARVEST_MAX_ITEMS=2)
|
|
265
298
|
def test_harvest_max_items(self, rmock):
|
|
266
|
-
rmock.get(
|
|
299
|
+
rmock.get("https://example.com/schemas", json=ResourceSchemaMockData.get_mock_data())
|
|
267
300
|
|
|
268
|
-
filename =
|
|
301
|
+
filename = "bnodes.xml"
|
|
269
302
|
url = mock_dcat(rmock, filename)
|
|
270
303
|
org = OrganizationFactory()
|
|
271
|
-
source = HarvestSourceFactory(backend=
|
|
304
|
+
source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
|
|
272
305
|
|
|
273
306
|
actions.run(source.slug)
|
|
274
307
|
|
|
275
308
|
assert Dataset.objects.count() == 2
|
|
276
|
-
assert HarvestJob.objects.first().status ==
|
|
309
|
+
assert HarvestJob.objects.first().status == "done"
|
|
277
310
|
|
|
278
|
-
@pytest.mark.options(SCHEMA_CATALOG_URL=
|
|
311
|
+
@pytest.mark.options(SCHEMA_CATALOG_URL="https://example.com/schemas")
|
|
279
312
|
def test_harvest_spatial(self, rmock):
|
|
280
|
-
rmock.get(
|
|
313
|
+
rmock.get("https://example.com/schemas", json=ResourceSchemaMockData.get_mock_data())
|
|
281
314
|
|
|
282
|
-
filename =
|
|
315
|
+
filename = "bnodes.xml"
|
|
283
316
|
url = mock_dcat(rmock, filename)
|
|
284
317
|
org = OrganizationFactory()
|
|
285
|
-
source = HarvestSourceFactory(backend=
|
|
318
|
+
source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
|
|
286
319
|
|
|
287
320
|
actions.run(source.slug)
|
|
288
321
|
|
|
289
322
|
datasets = {d.harvest.dct_identifier: d for d in Dataset.objects}
|
|
290
323
|
|
|
291
|
-
assert datasets[
|
|
292
|
-
assert datasets[
|
|
293
|
-
|
|
324
|
+
assert datasets["1"].spatial == None
|
|
325
|
+
assert datasets["2"].spatial.geom == {
|
|
326
|
+
"type": "MultiPolygon",
|
|
327
|
+
"coordinates": [
|
|
328
|
+
[[[-6, 51], [10, 51], [10, 40], [-6, 40], [-6, 51]]],
|
|
329
|
+
[[[4, 45], [4, 46], [4, 46], [4, 45], [4, 45]]],
|
|
330
|
+
[[[159, -25.0], [159, -11], [212, -11], [212, -25.0], [159, -25.0]]],
|
|
331
|
+
],
|
|
332
|
+
}
|
|
333
|
+
assert datasets["3"].spatial == None
|
|
294
334
|
|
|
295
|
-
@pytest.mark.options(SCHEMA_CATALOG_URL=
|
|
335
|
+
@pytest.mark.options(SCHEMA_CATALOG_URL="https://example.com/schemas")
|
|
296
336
|
def test_harvest_schemas(self, rmock):
|
|
297
|
-
rmock.get(
|
|
337
|
+
rmock.get("https://example.com/schemas", json=ResourceSchemaMockData.get_mock_data())
|
|
298
338
|
|
|
299
|
-
filename =
|
|
339
|
+
filename = "bnodes.xml"
|
|
300
340
|
url = mock_dcat(rmock, filename)
|
|
301
341
|
org = OrganizationFactory()
|
|
302
|
-
source = HarvestSourceFactory(backend=
|
|
303
|
-
url=url,
|
|
304
|
-
organization=org)
|
|
342
|
+
source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
|
|
305
343
|
|
|
306
344
|
actions.run(source.slug)
|
|
307
345
|
|
|
308
346
|
datasets = {d.harvest.dct_identifier: d for d in Dataset.objects}
|
|
309
347
|
|
|
310
|
-
assert datasets[
|
|
311
|
-
resources_by_title = {
|
|
348
|
+
assert datasets["1"].schema == None
|
|
349
|
+
resources_by_title = {resource["title"]: resource for resource in datasets["1"].resources}
|
|
312
350
|
|
|
313
351
|
# Schema with wrong version are considered as external. Maybe we could change this in the future
|
|
314
|
-
assert
|
|
315
|
-
|
|
316
|
-
|
|
352
|
+
assert (
|
|
353
|
+
resources_by_title["Resource 1-2"].schema.url
|
|
354
|
+
== "https://schema.data.gouv.fr/schemas/etalab/schema-irve-statique/1337.42.0/schema-statique.json"
|
|
355
|
+
)
|
|
356
|
+
assert resources_by_title["Resource 1-2"].schema.name == None
|
|
357
|
+
assert resources_by_title["Resource 1-2"].schema.version == None
|
|
317
358
|
|
|
318
|
-
assert datasets[
|
|
319
|
-
assert
|
|
320
|
-
|
|
359
|
+
assert datasets["2"].schema.name == "RGF93 / Lambert-93 (EPSG:2154)"
|
|
360
|
+
assert (
|
|
361
|
+
datasets["2"].schema.url
|
|
362
|
+
== "http://inspire.ec.europa.eu/glossary/SpatialReferenceSystem"
|
|
363
|
+
)
|
|
364
|
+
resources_by_title = {resource["title"]: resource for resource in datasets["2"].resources}
|
|
321
365
|
|
|
322
366
|
# Unknown schema are kept as they were provided
|
|
323
|
-
assert resources_by_title[
|
|
324
|
-
assert resources_by_title[
|
|
325
|
-
assert resources_by_title[
|
|
367
|
+
assert resources_by_title["Resource 2-1"].schema.name == "Example Schema"
|
|
368
|
+
assert resources_by_title["Resource 2-1"].schema.url == "https://example.org/schema.json"
|
|
369
|
+
assert resources_by_title["Resource 2-1"].schema.version == None
|
|
326
370
|
|
|
327
|
-
assert resources_by_title[
|
|
371
|
+
assert resources_by_title["Resource 2-2"].schema == None
|
|
328
372
|
|
|
329
|
-
assert datasets[
|
|
330
|
-
resources_by_title = {
|
|
373
|
+
assert datasets["3"].schema == None
|
|
374
|
+
resources_by_title = {resource["title"]: resource for resource in datasets["3"].resources}
|
|
331
375
|
|
|
332
376
|
# If there is just the URL, and it matches a known schema inside the catalog, only set the name and the version
|
|
333
377
|
# (discard the URL)
|
|
334
|
-
assert resources_by_title[
|
|
335
|
-
assert resources_by_title[
|
|
336
|
-
assert resources_by_title[
|
|
378
|
+
assert resources_by_title["Resource 3-1"].schema.name == "etalab/schema-irve-statique"
|
|
379
|
+
assert resources_by_title["Resource 3-1"].schema.url == None
|
|
380
|
+
assert resources_by_title["Resource 3-1"].schema.version == "2.2.0"
|
|
337
381
|
|
|
338
382
|
def test_simple_nested_attributes(self, rmock):
|
|
339
|
-
filename =
|
|
383
|
+
filename = "nested.jsonld"
|
|
340
384
|
url = mock_dcat(rmock, filename)
|
|
341
|
-
source = HarvestSourceFactory(backend=
|
|
342
|
-
url=url,
|
|
343
|
-
organization=OrganizationFactory())
|
|
385
|
+
source = HarvestSourceFactory(backend="dcat", url=url, organization=OrganizationFactory())
|
|
344
386
|
|
|
345
387
|
actions.run(source.slug)
|
|
346
388
|
|
|
@@ -353,23 +395,20 @@ class DcatBackendTest:
|
|
|
353
395
|
assert dataset.temporal_coverage is not None
|
|
354
396
|
assert dataset.temporal_coverage.start == date(2016, 1, 1)
|
|
355
397
|
assert dataset.temporal_coverage.end == date(2016, 12, 5)
|
|
356
|
-
assert dataset.harvest.remote_url ==
|
|
398
|
+
assert dataset.harvest.remote_url == "http://data.test.org/datasets/1"
|
|
357
399
|
|
|
358
400
|
assert len(dataset.resources) == 1
|
|
359
401
|
|
|
360
402
|
resource = dataset.resources[0]
|
|
361
403
|
assert resource.checksum is not None
|
|
362
|
-
assert resource.checksum.type ==
|
|
363
|
-
assert
|
|
364
|
-
== 'fb4106aa286a53be44ec99515f0f0421d4d7ad7d')
|
|
404
|
+
assert resource.checksum.type == "sha1"
|
|
405
|
+
assert resource.checksum.value == "fb4106aa286a53be44ec99515f0f0421d4d7ad7d"
|
|
365
406
|
|
|
366
407
|
def test_idempotence(self, rmock):
|
|
367
|
-
filename =
|
|
408
|
+
filename = "flat.jsonld"
|
|
368
409
|
url = mock_dcat(rmock, filename)
|
|
369
410
|
org = OrganizationFactory()
|
|
370
|
-
source = HarvestSourceFactory(backend=
|
|
371
|
-
url=url,
|
|
372
|
-
organization=org)
|
|
411
|
+
source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
|
|
373
412
|
|
|
374
413
|
# Run the same havester twice
|
|
375
414
|
actions.run(source.slug)
|
|
@@ -378,17 +417,14 @@ class DcatBackendTest:
|
|
|
378
417
|
datasets = {d.harvest.dct_identifier: d for d in Dataset.objects}
|
|
379
418
|
|
|
380
419
|
assert len(datasets) == 3
|
|
381
|
-
assert len(datasets[
|
|
382
|
-
assert len(datasets[
|
|
383
|
-
assert len(datasets[
|
|
420
|
+
assert len(datasets["1"].resources) == 2
|
|
421
|
+
assert len(datasets["2"].resources) == 2
|
|
422
|
+
assert len(datasets["3"].resources) == 1
|
|
384
423
|
|
|
385
424
|
def test_hydra_partial_collection_view_pagination(self, rmock):
|
|
386
|
-
url = mock_pagination(rmock,
|
|
387
|
-
'partial-collection-{page}.jsonld')
|
|
425
|
+
url = mock_pagination(rmock, "catalog.jsonld", "partial-collection-{page}.jsonld")
|
|
388
426
|
org = OrganizationFactory()
|
|
389
|
-
source = HarvestSourceFactory(backend=
|
|
390
|
-
url=url,
|
|
391
|
-
organization=org)
|
|
427
|
+
source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
|
|
392
428
|
|
|
393
429
|
actions.run(source.slug)
|
|
394
430
|
|
|
@@ -398,12 +434,9 @@ class DcatBackendTest:
|
|
|
398
434
|
assert len(job.items) == 4
|
|
399
435
|
|
|
400
436
|
def test_hydra_legacy_paged_collection_pagination(self, rmock):
|
|
401
|
-
url = mock_pagination(rmock,
|
|
402
|
-
'paged-collection-{page}.jsonld')
|
|
437
|
+
url = mock_pagination(rmock, "catalog.jsonld", "paged-collection-{page}.jsonld")
|
|
403
438
|
org = OrganizationFactory()
|
|
404
|
-
source = HarvestSourceFactory(backend=
|
|
405
|
-
url=url,
|
|
406
|
-
organization=org)
|
|
439
|
+
source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
|
|
407
440
|
|
|
408
441
|
actions.run(source.slug)
|
|
409
442
|
|
|
@@ -413,12 +446,10 @@ class DcatBackendTest:
|
|
|
413
446
|
assert len(job.items) == 4
|
|
414
447
|
|
|
415
448
|
def test_failure_on_initialize(self, rmock):
|
|
416
|
-
url = DCAT_URL_PATTERN.format(path=
|
|
417
|
-
rmock.get(url, text=
|
|
449
|
+
url = DCAT_URL_PATTERN.format(path="", domain=TEST_DOMAIN)
|
|
450
|
+
rmock.get(url, text="should fail")
|
|
418
451
|
org = OrganizationFactory()
|
|
419
|
-
source = HarvestSourceFactory(backend=
|
|
420
|
-
url=url,
|
|
421
|
-
organization=org)
|
|
452
|
+
source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
|
|
422
453
|
|
|
423
454
|
actions.run(source.slug)
|
|
424
455
|
|
|
@@ -426,15 +457,13 @@ class DcatBackendTest:
|
|
|
426
457
|
|
|
427
458
|
job = source.get_last_job()
|
|
428
459
|
|
|
429
|
-
assert job.status ==
|
|
460
|
+
assert job.status == "failed"
|
|
430
461
|
|
|
431
462
|
def test_supported_mime_type(self, rmock):
|
|
432
|
-
url = mock_dcat(rmock,
|
|
433
|
-
rmock.head(url, headers={
|
|
463
|
+
url = mock_dcat(rmock, "catalog.xml", path="without/extension")
|
|
464
|
+
rmock.head(url, headers={"Content-Type": "application/xml; charset=utf-8"})
|
|
434
465
|
org = OrganizationFactory()
|
|
435
|
-
source = HarvestSourceFactory(backend=
|
|
436
|
-
url=url,
|
|
437
|
-
organization=org)
|
|
466
|
+
source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
|
|
438
467
|
|
|
439
468
|
actions.run(source.slug)
|
|
440
469
|
|
|
@@ -442,144 +471,154 @@ class DcatBackendTest:
|
|
|
442
471
|
|
|
443
472
|
job = source.get_last_job()
|
|
444
473
|
|
|
445
|
-
assert job.status ==
|
|
474
|
+
assert job.status == "done"
|
|
446
475
|
assert job.errors == []
|
|
447
476
|
assert len(job.items) == 3
|
|
448
477
|
|
|
449
478
|
def test_xml_catalog(self, rmock):
|
|
450
|
-
LicenseFactory(id=
|
|
479
|
+
LicenseFactory(id="lov2", title="Licence Ouverte Version 2.0")
|
|
451
480
|
|
|
452
|
-
url = mock_dcat(rmock,
|
|
481
|
+
url = mock_dcat(rmock, "catalog.xml", path="catalog.xml")
|
|
453
482
|
org = OrganizationFactory()
|
|
454
|
-
source = HarvestSourceFactory(backend=
|
|
455
|
-
url=url,
|
|
456
|
-
organization=org)
|
|
483
|
+
source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
|
|
457
484
|
|
|
458
485
|
actions.run(source.slug)
|
|
459
486
|
|
|
460
487
|
# test dct:license support
|
|
461
|
-
dataset = Dataset.objects.get(harvest__dct_identifier=
|
|
462
|
-
assert dataset.license.id ==
|
|
463
|
-
assert dataset.harvest.remote_url ==
|
|
464
|
-
assert dataset.harvest.remote_id ==
|
|
488
|
+
dataset = Dataset.objects.get(harvest__dct_identifier="3")
|
|
489
|
+
assert dataset.license.id == "lov2"
|
|
490
|
+
assert dataset.harvest.remote_url == "http://data.test.org/datasets/3"
|
|
491
|
+
assert dataset.harvest.remote_id == "3"
|
|
465
492
|
assert dataset.harvest.created_at.date() == date(2016, 12, 14)
|
|
466
493
|
assert dataset.harvest.modified_at.date() == date(2016, 12, 14)
|
|
467
|
-
assert dataset.frequency ==
|
|
468
|
-
assert dataset.description ==
|
|
494
|
+
assert dataset.frequency == "daily"
|
|
495
|
+
assert dataset.description == "Dataset 3 description"
|
|
469
496
|
|
|
470
497
|
assert dataset.temporal_coverage is not None
|
|
471
498
|
assert dataset.temporal_coverage.start == date(2016, 1, 1)
|
|
472
499
|
assert dataset.temporal_coverage.end == date(2016, 12, 5)
|
|
473
500
|
|
|
474
|
-
assert
|
|
475
|
-
|
|
501
|
+
assert (
|
|
502
|
+
dataset.extras["harvest"]["dct:accessRights"]
|
|
503
|
+
== "http://inspire.ec.europa.eu/metadata-codelist/LimitationsOnPublicAccess/INSPIRE_Directive_Article13_1e"
|
|
504
|
+
)
|
|
505
|
+
assert dataset.extras["harvest"]["dct:provenance"] == [
|
|
506
|
+
"Description de la provenance des données"
|
|
507
|
+
]
|
|
476
508
|
|
|
477
|
-
assert
|
|
478
|
-
assert
|
|
509
|
+
assert "observation-de-la-terre-et-environnement" in dataset.tags
|
|
510
|
+
assert "hvd" in dataset.tags
|
|
479
511
|
|
|
480
|
-
dataset = Dataset.objects.get(harvest__dct_identifier=
|
|
512
|
+
dataset = Dataset.objects.get(harvest__dct_identifier="1")
|
|
481
513
|
# test html abstract description support
|
|
482
|
-
assert dataset.description ==
|
|
514
|
+
assert dataset.description == "# h1 title\n\n## h2 title\n\n **and bold text**"
|
|
483
515
|
# test DCAT periodoftime
|
|
484
516
|
assert dataset.temporal_coverage is not None
|
|
485
517
|
assert dataset.temporal_coverage.start == date(2016, 1, 1)
|
|
486
518
|
assert dataset.temporal_coverage.end == date(2016, 12, 5)
|
|
487
|
-
assert dataset.contact_point[
|
|
488
|
-
assert dataset.contact_point[
|
|
519
|
+
assert dataset.contact_point["email"] == "hello@its.me"
|
|
520
|
+
assert dataset.contact_point["name"] == "Organization contact"
|
|
489
521
|
assert dataset.frequency is None
|
|
490
522
|
|
|
491
523
|
assert len(dataset.resources) == 3
|
|
492
524
|
|
|
493
|
-
resource_1 = next(res for res in dataset.resources if res.title ==
|
|
494
|
-
assert resource_1.filetype ==
|
|
525
|
+
resource_1 = next(res for res in dataset.resources if res.title == "Resource 1-1")
|
|
526
|
+
assert resource_1.filetype == "remote"
|
|
495
527
|
# Format is a IANA URI
|
|
496
|
-
assert resource_1.format ==
|
|
497
|
-
assert resource_1.mime ==
|
|
528
|
+
assert resource_1.format == "json"
|
|
529
|
+
assert resource_1.mime == "application/json"
|
|
498
530
|
assert resource_1.filesize == 12323
|
|
499
|
-
assert resource_1.description ==
|
|
500
|
-
assert resource_1.url ==
|
|
501
|
-
assert resource_1.type ==
|
|
531
|
+
assert resource_1.description == "A JSON resource"
|
|
532
|
+
assert resource_1.url == "http://data.test.org/datasets/1/resources/1/file.json"
|
|
533
|
+
assert resource_1.type == "main"
|
|
502
534
|
|
|
503
|
-
resource_2 = next(res for res in dataset.resources if res.title ==
|
|
504
|
-
assert resource_2.format ==
|
|
505
|
-
assert resource_2.description ==
|
|
506
|
-
assert resource_2.url ==
|
|
507
|
-
assert resource_2.type ==
|
|
535
|
+
resource_2 = next(res for res in dataset.resources if res.title == "Resource 1-2")
|
|
536
|
+
assert resource_2.format == "json"
|
|
537
|
+
assert resource_2.description == "A JSON resource"
|
|
538
|
+
assert resource_2.url == "http://data.test.org/datasets/1/resources/2/file.json"
|
|
539
|
+
assert resource_2.type == "main"
|
|
508
540
|
|
|
509
541
|
# Make sure additionnal resource is correctly harvested
|
|
510
|
-
resource_3 = next(res for res in dataset.resources if res.title ==
|
|
511
|
-
assert resource_3.format ==
|
|
512
|
-
assert resource_3.description ==
|
|
513
|
-
assert resource_3.url ==
|
|
514
|
-
assert resource_3.type ==
|
|
542
|
+
resource_3 = next(res for res in dataset.resources if res.title == "Resource 1-3")
|
|
543
|
+
assert resource_3.format == "json"
|
|
544
|
+
assert resource_3.description == ""
|
|
545
|
+
assert resource_3.url == "http://data.test.org/datasets/1/resources/3"
|
|
546
|
+
assert resource_3.type == "other"
|
|
515
547
|
|
|
516
548
|
def test_geonetwork_xml_catalog(self, rmock):
|
|
517
|
-
url = mock_dcat(rmock,
|
|
549
|
+
url = mock_dcat(rmock, "geonetwork.xml", path="catalog.xml")
|
|
518
550
|
org = OrganizationFactory()
|
|
519
|
-
source = HarvestSourceFactory(backend=
|
|
520
|
-
url=url,
|
|
521
|
-
organization=org)
|
|
551
|
+
source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
|
|
522
552
|
actions.run(source.slug)
|
|
523
553
|
dataset = Dataset.objects.filter(organization=org).first()
|
|
524
554
|
assert dataset is not None
|
|
525
555
|
assert dataset.harvest is not None
|
|
526
|
-
assert
|
|
527
|
-
|
|
556
|
+
assert (
|
|
557
|
+
dataset.harvest.remote_id
|
|
558
|
+
== "0c456d2d-9548-4a2a-94ef-231d9d890ce2 https://sig.oreme.org/geonetwork/srv/resources0c456d2d-9548-4a2a-94ef-231d9d890ce2"
|
|
559
|
+
) # noqa
|
|
560
|
+
assert (
|
|
561
|
+
dataset.harvest.dct_identifier
|
|
562
|
+
== "0c456d2d-9548-4a2a-94ef-231d9d890ce2 https://sig.oreme.org/geonetwork/srv/resources0c456d2d-9548-4a2a-94ef-231d9d890ce2"
|
|
563
|
+
) # noqa
|
|
528
564
|
assert dataset.harvest.created_at.date() == date(2004, 11, 3)
|
|
529
565
|
assert dataset.harvest.modified_at is None
|
|
530
|
-
assert
|
|
566
|
+
assert (
|
|
567
|
+
dataset.harvest.uri
|
|
568
|
+
== "https://sig.oreme.org/geonetwork/srv/resources/datasets/0c456d2d-9548-4a2a-94ef-231d9d890ce2 https://sig.oreme.org/geonetwork/srv/resources0c456d2d-9548-4a2a-94ef-231d9d890ce2"
|
|
569
|
+
) # noqa
|
|
531
570
|
assert dataset.harvest.remote_url is None # the uri validation failed
|
|
532
|
-
assert dataset.description.startswith(
|
|
571
|
+
assert dataset.description.startswith("Data of type chemistry")
|
|
533
572
|
assert dataset.temporal_coverage is not None
|
|
534
573
|
assert dataset.temporal_coverage.start == date(2004, 11, 3)
|
|
535
574
|
assert dataset.temporal_coverage.end == date(2005, 3, 30)
|
|
536
575
|
|
|
537
576
|
def test_sigoreme_xml_catalog(self, rmock):
|
|
538
|
-
LicenseFactory(id=
|
|
539
|
-
url = mock_dcat(rmock,
|
|
577
|
+
LicenseFactory(id="fr-lo", title="Licence ouverte / Open Licence")
|
|
578
|
+
url = mock_dcat(rmock, "sig.oreme.rdf")
|
|
540
579
|
org = OrganizationFactory()
|
|
541
|
-
source = HarvestSourceFactory(backend=
|
|
542
|
-
url=url,
|
|
543
|
-
organization=org)
|
|
580
|
+
source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
|
|
544
581
|
actions.run(source.slug)
|
|
545
582
|
dataset = Dataset.objects.filter(organization=org).first()
|
|
546
583
|
|
|
547
584
|
assert dataset is not None
|
|
548
|
-
assert dataset.frequency ==
|
|
549
|
-
assert
|
|
550
|
-
assert
|
|
551
|
-
assert dataset.license.id ==
|
|
585
|
+
assert dataset.frequency == "irregular"
|
|
586
|
+
assert "gravi" in dataset.tags # support dcat:keyword
|
|
587
|
+
assert "geodesy" in dataset.tags # support dcat:theme
|
|
588
|
+
assert dataset.license.id == "fr-lo"
|
|
552
589
|
assert len(dataset.resources) == 1
|
|
553
590
|
assert dataset.description.startswith("Data from the 'National network")
|
|
554
591
|
assert dataset.harvest is not None
|
|
555
|
-
assert dataset.harvest.dct_identifier ==
|
|
556
|
-
assert dataset.harvest.remote_id ==
|
|
592
|
+
assert dataset.harvest.dct_identifier == "0437a976-cff1-4fa6-807a-c23006df2f8f"
|
|
593
|
+
assert dataset.harvest.remote_id == "0437a976-cff1-4fa6-807a-c23006df2f8f"
|
|
557
594
|
assert dataset.harvest.created_at is None
|
|
558
595
|
assert dataset.harvest.modified_at is None
|
|
559
|
-
assert
|
|
560
|
-
|
|
596
|
+
assert (
|
|
597
|
+
dataset.harvest.uri
|
|
598
|
+
== "https://sig.oreme.org/geonetwork/srv/eng/catalog.search#/metadata//datasets/0437a976-cff1-4fa6-807a-c23006df2f8f"
|
|
599
|
+
) # noqa
|
|
600
|
+
assert (
|
|
601
|
+
dataset.harvest.remote_url
|
|
602
|
+
== "https://sig.oreme.org/geonetwork/srv/eng/catalog.search#/metadata//datasets/0437a976-cff1-4fa6-807a-c23006df2f8f"
|
|
603
|
+
) # noqa
|
|
561
604
|
assert dataset.harvest.last_update.date() == date.today()
|
|
562
605
|
|
|
563
606
|
def test_user_agent_get(self, rmock):
|
|
564
|
-
url = mock_dcat(rmock,
|
|
565
|
-
rmock.head(url, headers={
|
|
607
|
+
url = mock_dcat(rmock, "catalog.xml", path="without/extension")
|
|
608
|
+
rmock.head(url, headers={"Content-Type": "application/xml; charset=utf-8"})
|
|
566
609
|
get_mock = rmock.get(url)
|
|
567
610
|
org = OrganizationFactory()
|
|
568
|
-
source = HarvestSourceFactory(backend=
|
|
569
|
-
url=url,
|
|
570
|
-
organization=org)
|
|
611
|
+
source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
|
|
571
612
|
actions.run(source.slug)
|
|
572
613
|
|
|
573
|
-
assert
|
|
574
|
-
assert get_mock.last_request.headers[
|
|
614
|
+
assert "User-Agent" in get_mock.last_request.headers
|
|
615
|
+
assert get_mock.last_request.headers["User-Agent"] == "uData/0.1 dcat"
|
|
575
616
|
|
|
576
617
|
def test_unsupported_mime_type(self, rmock):
|
|
577
|
-
url = DCAT_URL_PATTERN.format(path=
|
|
578
|
-
rmock.head(url, headers={
|
|
618
|
+
url = DCAT_URL_PATTERN.format(path="", domain=TEST_DOMAIN)
|
|
619
|
+
rmock.head(url, headers={"Content-Type": "text/html; charset=utf-8"})
|
|
579
620
|
org = OrganizationFactory()
|
|
580
|
-
source = HarvestSourceFactory(backend=
|
|
581
|
-
url=url,
|
|
582
|
-
organization=org)
|
|
621
|
+
source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
|
|
583
622
|
|
|
584
623
|
actions.run(source.slug)
|
|
585
624
|
|
|
@@ -587,19 +626,17 @@ class DcatBackendTest:
|
|
|
587
626
|
|
|
588
627
|
job = source.get_last_job()
|
|
589
628
|
|
|
590
|
-
assert job.status ==
|
|
629
|
+
assert job.status == "failed"
|
|
591
630
|
assert len(job.errors) == 1
|
|
592
631
|
|
|
593
632
|
error = job.errors[0]
|
|
594
633
|
assert error.message == 'Unsupported mime type "text/html"'
|
|
595
634
|
|
|
596
635
|
def test_unable_to_detect_format(self, rmock):
|
|
597
|
-
url = DCAT_URL_PATTERN.format(path=
|
|
598
|
-
rmock.head(url, headers={
|
|
636
|
+
url = DCAT_URL_PATTERN.format(path="", domain=TEST_DOMAIN)
|
|
637
|
+
rmock.head(url, headers={"Content-Type": ""})
|
|
599
638
|
org = OrganizationFactory()
|
|
600
|
-
source = HarvestSourceFactory(backend=
|
|
601
|
-
url=url,
|
|
602
|
-
organization=org)
|
|
639
|
+
source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
|
|
603
640
|
|
|
604
641
|
actions.run(source.slug)
|
|
605
642
|
|
|
@@ -607,43 +644,46 @@ class DcatBackendTest:
|
|
|
607
644
|
|
|
608
645
|
job = source.get_last_job()
|
|
609
646
|
|
|
610
|
-
assert job.status ==
|
|
647
|
+
assert job.status == "failed"
|
|
611
648
|
assert len(job.errors) == 1
|
|
612
649
|
|
|
613
650
|
error = job.errors[0]
|
|
614
|
-
expected =
|
|
651
|
+
expected = "Unable to detect format from extension or mime type"
|
|
615
652
|
assert error.message == expected
|
|
616
653
|
|
|
617
654
|
def test_use_replaced_uris(self, rmock, mocker):
|
|
618
655
|
mocker.patch.dict(
|
|
619
656
|
URIS_TO_REPLACE,
|
|
620
|
-
{
|
|
657
|
+
{
|
|
658
|
+
"http://example.org/this-url-does-not-exist": "https://json-ld.org/contexts/person.jsonld"
|
|
659
|
+
},
|
|
621
660
|
)
|
|
622
|
-
url = DCAT_URL_PATTERN.format(path=
|
|
623
|
-
rmock.get(
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
661
|
+
url = DCAT_URL_PATTERN.format(path="", domain=TEST_DOMAIN)
|
|
662
|
+
rmock.get(
|
|
663
|
+
url,
|
|
664
|
+
json={
|
|
665
|
+
"@context": "http://example.org/this-url-does-not-exist",
|
|
666
|
+
"@type": "dcat:Catalog",
|
|
667
|
+
"dataset": [],
|
|
668
|
+
},
|
|
669
|
+
)
|
|
670
|
+
rmock.head(url, headers={"Content-Type": "application/json"})
|
|
629
671
|
org = OrganizationFactory()
|
|
630
|
-
source = HarvestSourceFactory(backend=
|
|
631
|
-
url=url,
|
|
632
|
-
organization=org)
|
|
672
|
+
source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
|
|
633
673
|
actions.run(source.slug)
|
|
634
674
|
|
|
635
675
|
source.reload()
|
|
636
676
|
|
|
637
677
|
job = source.get_last_job()
|
|
638
678
|
assert len(job.items) == 0
|
|
639
|
-
assert job.status ==
|
|
679
|
+
assert job.status == "done"
|
|
640
680
|
|
|
641
681
|
def test_target_404(self, rmock):
|
|
642
|
-
filename =
|
|
682
|
+
filename = "obvious-format.jsonld"
|
|
643
683
|
url = DCAT_URL_PATTERN.format(path=filename, domain=TEST_DOMAIN)
|
|
644
684
|
rmock.get(url, status_code=404)
|
|
645
685
|
|
|
646
|
-
source = HarvestSourceFactory(backend=
|
|
686
|
+
source = HarvestSourceFactory(backend="dcat", url=url, organization=OrganizationFactory())
|
|
647
687
|
actions.run(source.slug)
|
|
648
688
|
source.reload()
|
|
649
689
|
|
|
@@ -652,11 +692,11 @@ class DcatBackendTest:
|
|
|
652
692
|
assert len(job.errors) == 1
|
|
653
693
|
assert "404 Client Error" in job.errors[0].message
|
|
654
694
|
|
|
655
|
-
filename =
|
|
695
|
+
filename = "need-to-head-to-guess-format"
|
|
656
696
|
url = DCAT_URL_PATTERN.format(path=filename, domain=TEST_DOMAIN)
|
|
657
697
|
rmock.head(url, status_code=404)
|
|
658
698
|
|
|
659
|
-
source = HarvestSourceFactory(backend=
|
|
699
|
+
source = HarvestSourceFactory(backend="dcat", url=url, organization=OrganizationFactory())
|
|
660
700
|
actions.run(source.slug)
|
|
661
701
|
source.reload()
|
|
662
702
|
|
|
@@ -666,16 +706,13 @@ class DcatBackendTest:
|
|
|
666
706
|
assert "404 Client Error" in job.errors[0].message
|
|
667
707
|
|
|
668
708
|
|
|
669
|
-
@pytest.mark.usefixtures(
|
|
670
|
-
@pytest.mark.options(PLUGINS=[
|
|
709
|
+
@pytest.mark.usefixtures("clean_db")
|
|
710
|
+
@pytest.mark.options(PLUGINS=["csw"])
|
|
671
711
|
class CswDcatBackendTest:
|
|
672
|
-
|
|
673
712
|
def test_geonetworkv4(self, rmock):
|
|
674
|
-
url = mock_csw_pagination(rmock,
|
|
713
|
+
url = mock_csw_pagination(rmock, "geonetwork/srv/eng/csw.rdf", "geonetworkv4-page-{}.xml")
|
|
675
714
|
org = OrganizationFactory()
|
|
676
|
-
source = HarvestSourceFactory(backend=
|
|
677
|
-
url=url,
|
|
678
|
-
organization=org)
|
|
715
|
+
source = HarvestSourceFactory(backend="csw-dcat", url=url, organization=org)
|
|
679
716
|
|
|
680
717
|
actions.run(source.slug)
|
|
681
718
|
|
|
@@ -689,48 +726,55 @@ class CswDcatBackendTest:
|
|
|
689
726
|
assert len(datasets) == 6
|
|
690
727
|
|
|
691
728
|
# First dataset
|
|
692
|
-
dataset = datasets[
|
|
693
|
-
assert dataset.title ==
|
|
694
|
-
assert
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
729
|
+
dataset = datasets["https://www.geo2france.fr/2017/accidento"]
|
|
730
|
+
assert dataset.title == "Localisation des accidents de la circulation routière en 2017"
|
|
731
|
+
assert (
|
|
732
|
+
dataset.description == "Accidents corporels de la circulation en Hauts de France (2017)"
|
|
733
|
+
)
|
|
734
|
+
assert set(dataset.tags) == set(
|
|
735
|
+
[
|
|
736
|
+
"donnee-ouverte",
|
|
737
|
+
"accidentologie",
|
|
738
|
+
"accident",
|
|
739
|
+
"reseaux-de-transport",
|
|
740
|
+
"accident-de-la-route",
|
|
741
|
+
"hauts-de-france",
|
|
742
|
+
"nord",
|
|
743
|
+
"pas-de-calais",
|
|
744
|
+
"oise",
|
|
745
|
+
"somme",
|
|
746
|
+
"aisne",
|
|
747
|
+
]
|
|
748
|
+
)
|
|
699
749
|
assert dataset.harvest.created_at.date() == date(2017, 1, 1)
|
|
700
750
|
assert len(dataset.resources) == 1
|
|
701
751
|
resource = dataset.resources[0]
|
|
702
|
-
assert resource.title ==
|
|
703
|
-
assert resource.url ==
|
|
704
|
-
assert resource.format ==
|
|
752
|
+
assert resource.title == "accidento_hdf_L93"
|
|
753
|
+
assert resource.url == "https://www.geo2france.fr/geoserver/cr_hdf/ows"
|
|
754
|
+
assert resource.format == "ogc:wms"
|
|
705
755
|
|
|
706
756
|
def test_user_agent_post(self, rmock):
|
|
707
|
-
url = mock_csw_pagination(rmock,
|
|
757
|
+
url = mock_csw_pagination(rmock, "geonetwork/srv/eng/csw.rdf", "geonetworkv4-page-{}.xml")
|
|
708
758
|
get_mock = rmock.post(url)
|
|
709
759
|
org = OrganizationFactory()
|
|
710
|
-
source = HarvestSourceFactory(backend=
|
|
711
|
-
url=url,
|
|
712
|
-
organization=org)
|
|
760
|
+
source = HarvestSourceFactory(backend="csw-dcat", url=url, organization=org)
|
|
713
761
|
|
|
714
762
|
actions.run(source.slug)
|
|
715
763
|
|
|
716
|
-
assert
|
|
717
|
-
assert get_mock.last_request.headers[
|
|
764
|
+
assert "User-Agent" in get_mock.last_request.headers
|
|
765
|
+
assert get_mock.last_request.headers["User-Agent"] == "uData/0.1 csw-dcat"
|
|
718
766
|
|
|
719
767
|
|
|
720
|
-
@pytest.mark.usefixtures(
|
|
721
|
-
@pytest.mark.options(PLUGINS=[
|
|
768
|
+
@pytest.mark.usefixtures("clean_db")
|
|
769
|
+
@pytest.mark.options(PLUGINS=["csw"])
|
|
722
770
|
class CswIso19139DcatBackendTest:
|
|
723
|
-
|
|
724
771
|
def test_geo2france(self, rmock):
|
|
725
|
-
|
|
726
772
|
with open(os.path.join(CSW_DCAT_FILES_DIR, "XSLT.xml"), "r") as f:
|
|
727
773
|
xslt = f.read()
|
|
728
|
-
url = mock_csw_pagination(rmock,
|
|
774
|
+
url = mock_csw_pagination(rmock, "geonetwork/srv/eng/csw.rdf", "geonetwork-iso-page-{}.xml")
|
|
729
775
|
rmock.get(CswIso19139DcatBackend.XSL_URL, text=xslt)
|
|
730
776
|
org = OrganizationFactory()
|
|
731
|
-
source = HarvestSourceFactory(backend=
|
|
732
|
-
url=url,
|
|
733
|
-
organization=org)
|
|
777
|
+
source = HarvestSourceFactory(backend="csw-iso-19139", url=url, organization=org)
|
|
734
778
|
|
|
735
779
|
actions.run(source.slug)
|
|
736
780
|
|
|
@@ -745,19 +789,44 @@ class CswIso19139DcatBackendTest:
|
|
|
745
789
|
|
|
746
790
|
# First dataset
|
|
747
791
|
# dataset identifier is gmd:RS_Identifier > gmd:codeSpace + gmd:code
|
|
748
|
-
dataset = datasets[
|
|
792
|
+
dataset = datasets[
|
|
793
|
+
"http://catalogue.geo-ide.developpement-durable.gouv.fr/fr-120066022-orphan-residentifier-140d31c6-643d-42a9-85df-2737a118e144"
|
|
794
|
+
]
|
|
749
795
|
assert dataset.title == "Plan local d'urbanisme de la commune de Cartigny"
|
|
750
|
-
assert
|
|
751
|
-
|
|
752
|
-
'
|
|
753
|
-
|
|
754
|
-
|
|
796
|
+
assert (
|
|
797
|
+
dataset.description
|
|
798
|
+
== "Le présent standard de données COVADIS concerne les documents de plans locaux d'urbanisme (PLU) et les plans d'occupation des sols (POS qui valent PLU)."
|
|
799
|
+
)
|
|
800
|
+
assert set(dataset.tags) == set(
|
|
801
|
+
[
|
|
802
|
+
"amenagement-urbanisme-zonages-planification",
|
|
803
|
+
"cartigny",
|
|
804
|
+
"document-durbanisme",
|
|
805
|
+
"donnees-ouvertes",
|
|
806
|
+
"plu",
|
|
807
|
+
"usage-des-sols",
|
|
808
|
+
]
|
|
809
|
+
)
|
|
755
810
|
assert dataset.harvest.created_at.date() == date(2017, 10, 7)
|
|
756
|
-
assert dataset.spatial.geom == {
|
|
757
|
-
|
|
811
|
+
assert dataset.spatial.geom == {
|
|
812
|
+
"type": "MultiPolygon",
|
|
813
|
+
"coordinates": [
|
|
814
|
+
[
|
|
815
|
+
[
|
|
816
|
+
[3.28133559, 50.48188019],
|
|
817
|
+
[1.31279111, 50.48188019],
|
|
818
|
+
[1.31279111, 49.38547516],
|
|
819
|
+
[3.28133559, 49.38547516],
|
|
820
|
+
[3.28133559, 50.48188019],
|
|
821
|
+
]
|
|
822
|
+
]
|
|
823
|
+
],
|
|
758
824
|
}
|
|
759
|
-
assert
|
|
760
|
-
|
|
825
|
+
assert (
|
|
826
|
+
dataset.contact_point.name
|
|
827
|
+
== "DDTM 80 (Direction Départementale des Territoires et de la Mer de la Somme)"
|
|
828
|
+
)
|
|
829
|
+
assert dataset.contact_point.email == "ddtm-sap-bsig@somme.gouv.fr"
|
|
761
830
|
|
|
762
831
|
# License is not properly mapped in XSLT conversion
|
|
763
832
|
assert dataset.license is None
|
|
@@ -767,8 +836,11 @@ class CswIso19139DcatBackendTest:
|
|
|
767
836
|
# (See mapping at: https://semiceu.github.io/GeoDCAT-AP/releases/2.0.0/#resource-locator---on-line-resource)
|
|
768
837
|
assert len(dataset.resources) == 1
|
|
769
838
|
resource = dataset.resources[0]
|
|
770
|
-
assert resource.title ==
|
|
771
|
-
assert
|
|
772
|
-
|
|
839
|
+
assert resource.title == "Téléchargement direct du lot et des documents associés"
|
|
840
|
+
assert (
|
|
841
|
+
resource.url
|
|
842
|
+
== "http://atom.geo-ide.developpement-durable.gouv.fr/atomArchive/GetResource?id=fr-120066022-ldd-cab63273-b3ae-4e8a-ae1c-6192e45faa94&datasetAggregate=true"
|
|
843
|
+
)
|
|
844
|
+
|
|
773
845
|
# Sadly resource format is parsed as a blank node. Format parsing should be improved.
|
|
774
|
-
assert re.match(r
|
|
846
|
+
assert re.match(r"n[0-9a-f]{32}", resource.format)
|