udata 9.1.2.dev30355__py2.py3-none-any.whl → 9.1.2.dev30382__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of udata might be problematic. Click here for more details.
- tasks/__init__.py +109 -107
- tasks/helpers.py +18 -18
- udata/__init__.py +4 -4
- udata/admin/views.py +5 -5
- udata/api/__init__.py +135 -124
- udata/api/commands.py +45 -37
- udata/api/errors.py +5 -4
- udata/api/fields.py +23 -21
- udata/api/oauth2.py +55 -74
- udata/api/parsers.py +15 -15
- udata/api/signals.py +1 -1
- udata/api_fields.py +137 -89
- udata/app.py +56 -54
- udata/assets.py +5 -5
- udata/auth/__init__.py +37 -26
- udata/auth/forms.py +23 -15
- udata/auth/helpers.py +1 -1
- udata/auth/mails.py +3 -3
- udata/auth/password_validation.py +19 -15
- udata/auth/views.py +94 -68
- udata/commands/__init__.py +71 -69
- udata/commands/cache.py +7 -7
- udata/commands/db.py +201 -140
- udata/commands/dcat.py +36 -30
- udata/commands/fixtures.py +100 -84
- udata/commands/images.py +21 -20
- udata/commands/info.py +17 -20
- udata/commands/init.py +10 -10
- udata/commands/purge.py +12 -13
- udata/commands/serve.py +41 -29
- udata/commands/static.py +16 -18
- udata/commands/test.py +20 -20
- udata/commands/tests/fixtures.py +26 -24
- udata/commands/worker.py +31 -33
- udata/core/__init__.py +12 -12
- udata/core/activity/__init__.py +0 -1
- udata/core/activity/api.py +59 -49
- udata/core/activity/models.py +28 -26
- udata/core/activity/signals.py +1 -1
- udata/core/activity/tasks.py +16 -10
- udata/core/badges/api.py +6 -6
- udata/core/badges/commands.py +14 -13
- udata/core/badges/fields.py +8 -5
- udata/core/badges/forms.py +7 -4
- udata/core/badges/models.py +16 -31
- udata/core/badges/permissions.py +1 -3
- udata/core/badges/signals.py +2 -2
- udata/core/badges/tasks.py +3 -2
- udata/core/badges/tests/test_commands.py +10 -10
- udata/core/badges/tests/test_model.py +24 -31
- udata/core/contact_point/api.py +19 -18
- udata/core/contact_point/api_fields.py +21 -14
- udata/core/contact_point/factories.py +2 -2
- udata/core/contact_point/forms.py +7 -6
- udata/core/contact_point/models.py +3 -5
- udata/core/dataservices/api.py +26 -21
- udata/core/dataservices/factories.py +13 -11
- udata/core/dataservices/models.py +35 -40
- udata/core/dataservices/permissions.py +4 -4
- udata/core/dataservices/rdf.py +40 -17
- udata/core/dataservices/tasks.py +4 -3
- udata/core/dataset/actions.py +10 -10
- udata/core/dataset/activities.py +21 -23
- udata/core/dataset/api.py +321 -298
- udata/core/dataset/api_fields.py +443 -271
- udata/core/dataset/apiv2.py +305 -229
- udata/core/dataset/commands.py +38 -36
- udata/core/dataset/constants.py +61 -54
- udata/core/dataset/csv.py +70 -74
- udata/core/dataset/events.py +39 -32
- udata/core/dataset/exceptions.py +8 -4
- udata/core/dataset/factories.py +57 -65
- udata/core/dataset/forms.py +87 -63
- udata/core/dataset/models.py +336 -280
- udata/core/dataset/permissions.py +9 -6
- udata/core/dataset/preview.py +15 -17
- udata/core/dataset/rdf.py +156 -122
- udata/core/dataset/search.py +92 -77
- udata/core/dataset/signals.py +1 -1
- udata/core/dataset/tasks.py +63 -54
- udata/core/discussions/actions.py +5 -5
- udata/core/discussions/api.py +124 -120
- udata/core/discussions/factories.py +2 -2
- udata/core/discussions/forms.py +9 -7
- udata/core/discussions/metrics.py +1 -3
- udata/core/discussions/models.py +25 -24
- udata/core/discussions/notifications.py +18 -14
- udata/core/discussions/permissions.py +3 -3
- udata/core/discussions/signals.py +4 -4
- udata/core/discussions/tasks.py +24 -28
- udata/core/followers/api.py +32 -33
- udata/core/followers/models.py +9 -9
- udata/core/followers/signals.py +3 -3
- udata/core/jobs/actions.py +7 -7
- udata/core/jobs/api.py +99 -92
- udata/core/jobs/commands.py +48 -49
- udata/core/jobs/forms.py +11 -11
- udata/core/jobs/models.py +6 -6
- udata/core/metrics/__init__.py +2 -2
- udata/core/metrics/commands.py +34 -30
- udata/core/metrics/models.py +2 -4
- udata/core/metrics/signals.py +1 -1
- udata/core/metrics/tasks.py +3 -3
- udata/core/organization/activities.py +12 -15
- udata/core/organization/api.py +167 -174
- udata/core/organization/api_fields.py +183 -124
- udata/core/organization/apiv2.py +32 -32
- udata/core/organization/commands.py +20 -22
- udata/core/organization/constants.py +11 -11
- udata/core/organization/csv.py +17 -15
- udata/core/organization/factories.py +8 -11
- udata/core/organization/forms.py +32 -26
- udata/core/organization/metrics.py +2 -1
- udata/core/organization/models.py +87 -67
- udata/core/organization/notifications.py +18 -14
- udata/core/organization/permissions.py +10 -11
- udata/core/organization/rdf.py +14 -14
- udata/core/organization/search.py +30 -28
- udata/core/organization/signals.py +7 -7
- udata/core/organization/tasks.py +42 -61
- udata/core/owned.py +38 -27
- udata/core/post/api.py +82 -81
- udata/core/post/constants.py +8 -5
- udata/core/post/factories.py +4 -4
- udata/core/post/forms.py +13 -14
- udata/core/post/models.py +20 -22
- udata/core/post/tests/test_api.py +30 -32
- udata/core/reports/api.py +8 -7
- udata/core/reports/constants.py +1 -3
- udata/core/reports/models.py +10 -10
- udata/core/reuse/activities.py +15 -19
- udata/core/reuse/api.py +123 -126
- udata/core/reuse/api_fields.py +120 -85
- udata/core/reuse/apiv2.py +11 -10
- udata/core/reuse/constants.py +23 -23
- udata/core/reuse/csv.py +18 -18
- udata/core/reuse/factories.py +5 -9
- udata/core/reuse/forms.py +24 -21
- udata/core/reuse/models.py +55 -51
- udata/core/reuse/permissions.py +2 -2
- udata/core/reuse/search.py +49 -46
- udata/core/reuse/signals.py +1 -1
- udata/core/reuse/tasks.py +4 -5
- udata/core/site/api.py +47 -50
- udata/core/site/factories.py +2 -2
- udata/core/site/forms.py +4 -5
- udata/core/site/models.py +94 -63
- udata/core/site/rdf.py +14 -14
- udata/core/spam/api.py +16 -9
- udata/core/spam/constants.py +4 -4
- udata/core/spam/fields.py +13 -7
- udata/core/spam/models.py +27 -20
- udata/core/spam/signals.py +1 -1
- udata/core/spam/tests/test_spam.py +6 -5
- udata/core/spatial/api.py +72 -80
- udata/core/spatial/api_fields.py +73 -58
- udata/core/spatial/commands.py +67 -64
- udata/core/spatial/constants.py +3 -3
- udata/core/spatial/factories.py +37 -54
- udata/core/spatial/forms.py +27 -26
- udata/core/spatial/geoids.py +17 -17
- udata/core/spatial/models.py +43 -47
- udata/core/spatial/tasks.py +2 -1
- udata/core/spatial/tests/test_api.py +115 -130
- udata/core/spatial/tests/test_fields.py +74 -77
- udata/core/spatial/tests/test_geoid.py +22 -22
- udata/core/spatial/tests/test_models.py +5 -7
- udata/core/spatial/translations.py +16 -16
- udata/core/storages/__init__.py +16 -18
- udata/core/storages/api.py +66 -64
- udata/core/storages/tasks.py +7 -7
- udata/core/storages/utils.py +15 -15
- udata/core/storages/views.py +5 -6
- udata/core/tags/api.py +17 -14
- udata/core/tags/csv.py +4 -4
- udata/core/tags/models.py +8 -5
- udata/core/tags/tasks.py +11 -13
- udata/core/tags/views.py +4 -4
- udata/core/topic/api.py +84 -73
- udata/core/topic/apiv2.py +157 -127
- udata/core/topic/factories.py +3 -4
- udata/core/topic/forms.py +12 -14
- udata/core/topic/models.py +14 -19
- udata/core/topic/parsers.py +26 -26
- udata/core/user/activities.py +30 -29
- udata/core/user/api.py +151 -152
- udata/core/user/api_fields.py +132 -100
- udata/core/user/apiv2.py +7 -7
- udata/core/user/commands.py +38 -38
- udata/core/user/factories.py +8 -9
- udata/core/user/forms.py +14 -11
- udata/core/user/metrics.py +2 -2
- udata/core/user/models.py +68 -69
- udata/core/user/permissions.py +4 -5
- udata/core/user/rdf.py +7 -8
- udata/core/user/tasks.py +2 -2
- udata/core/user/tests/test_user_model.py +24 -16
- udata/db/tasks.py +2 -1
- udata/entrypoints.py +35 -31
- udata/errors.py +2 -1
- udata/event/values.py +6 -6
- udata/factories.py +2 -2
- udata/features/identicon/api.py +5 -6
- udata/features/identicon/backends.py +48 -55
- udata/features/identicon/tests/test_backends.py +4 -5
- udata/features/notifications/__init__.py +0 -1
- udata/features/notifications/actions.py +9 -9
- udata/features/notifications/api.py +17 -13
- udata/features/territories/__init__.py +12 -10
- udata/features/territories/api.py +14 -15
- udata/features/territories/models.py +23 -28
- udata/features/transfer/actions.py +8 -11
- udata/features/transfer/api.py +84 -77
- udata/features/transfer/factories.py +2 -1
- udata/features/transfer/models.py +11 -12
- udata/features/transfer/notifications.py +19 -15
- udata/features/transfer/permissions.py +5 -5
- udata/forms/__init__.py +5 -2
- udata/forms/fields.py +164 -172
- udata/forms/validators.py +19 -22
- udata/forms/widgets.py +9 -13
- udata/frontend/__init__.py +31 -26
- udata/frontend/csv.py +68 -58
- udata/frontend/markdown.py +40 -44
- udata/harvest/actions.py +89 -77
- udata/harvest/api.py +294 -238
- udata/harvest/backends/__init__.py +4 -4
- udata/harvest/backends/base.py +128 -111
- udata/harvest/backends/dcat.py +80 -66
- udata/harvest/commands.py +56 -60
- udata/harvest/csv.py +8 -8
- udata/harvest/exceptions.py +6 -3
- udata/harvest/filters.py +24 -23
- udata/harvest/forms.py +27 -28
- udata/harvest/models.py +88 -80
- udata/harvest/notifications.py +15 -10
- udata/harvest/signals.py +13 -13
- udata/harvest/tasks.py +11 -10
- udata/harvest/tests/factories.py +23 -24
- udata/harvest/tests/test_actions.py +136 -166
- udata/harvest/tests/test_api.py +220 -214
- udata/harvest/tests/test_base_backend.py +117 -112
- udata/harvest/tests/test_dcat_backend.py +380 -308
- udata/harvest/tests/test_filters.py +33 -22
- udata/harvest/tests/test_models.py +11 -14
- udata/harvest/tests/test_notifications.py +6 -7
- udata/harvest/tests/test_tasks.py +7 -6
- udata/i18n.py +237 -78
- udata/linkchecker/backends.py +5 -11
- udata/linkchecker/checker.py +23 -22
- udata/linkchecker/commands.py +4 -6
- udata/linkchecker/models.py +6 -6
- udata/linkchecker/tasks.py +18 -20
- udata/mail.py +21 -21
- udata/migrations/2020-07-24-remove-s-from-scope-oauth.py +9 -8
- udata/migrations/2020-08-24-add-fs-filename.py +9 -8
- udata/migrations/2020-09-28-update-reuses-datasets-metrics.py +5 -4
- udata/migrations/2020-10-16-migrate-ods-resources.py +9 -10
- udata/migrations/2021-04-08-update-schema-with-new-structure.py +8 -7
- udata/migrations/2021-05-27-fix-default-schema-name.py +7 -6
- udata/migrations/2021-07-05-remove-unused-badges.py +17 -15
- udata/migrations/2021-07-07-update-schema-for-community-resources.py +7 -6
- udata/migrations/2021-08-17-follow-integrity.py +5 -4
- udata/migrations/2021-08-17-harvest-integrity.py +13 -12
- udata/migrations/2021-08-17-oauth2client-integrity.py +5 -4
- udata/migrations/2021-08-17-transfer-integrity.py +5 -4
- udata/migrations/2021-08-17-users-integrity.py +9 -8
- udata/migrations/2021-12-14-reuse-topics.py +7 -6
- udata/migrations/2022-04-21-improve-extension-detection.py +8 -7
- udata/migrations/2022-09-22-clean-inactive-harvest-datasets.py +16 -14
- udata/migrations/2022-10-10-add-fs_uniquifier-to-user-model.py +6 -6
- udata/migrations/2022-10-10-migrate-harvest-extras.py +36 -26
- udata/migrations/2023-02-08-rename-internal-dates.py +46 -28
- udata/migrations/2024-01-29-fix-reuse-and-dataset-with-private-None.py +10 -8
- udata/migrations/2024-03-22-migrate-activity-kwargs-to-extras.py +6 -4
- udata/migrations/2024-06-11-fix-reuse-datasets-references.py +7 -6
- udata/migrations/__init__.py +123 -105
- udata/models/__init__.py +4 -4
- udata/mongo/__init__.py +13 -11
- udata/mongo/badges_field.py +3 -2
- udata/mongo/datetime_fields.py +13 -12
- udata/mongo/document.py +17 -16
- udata/mongo/engine.py +15 -16
- udata/mongo/errors.py +2 -1
- udata/mongo/extras_fields.py +30 -20
- udata/mongo/queryset.py +12 -12
- udata/mongo/slug_fields.py +38 -28
- udata/mongo/taglist_field.py +1 -2
- udata/mongo/url_field.py +5 -5
- udata/mongo/uuid_fields.py +4 -3
- udata/notifications/__init__.py +1 -1
- udata/notifications/mattermost.py +10 -9
- udata/rdf.py +167 -188
- udata/routing.py +40 -45
- udata/search/__init__.py +18 -19
- udata/search/adapter.py +17 -16
- udata/search/commands.py +44 -51
- udata/search/fields.py +13 -20
- udata/search/query.py +23 -18
- udata/search/result.py +9 -10
- udata/sentry.py +21 -19
- udata/settings.py +262 -198
- udata/sitemap.py +8 -6
- udata/static/chunks/{11.e9b9ca1f3e03d4020377.js → 11.52e531c19f8de80c00cf.js} +3 -3
- udata/static/chunks/{11.e9b9ca1f3e03d4020377.js.map → 11.52e531c19f8de80c00cf.js.map} +1 -1
- udata/static/chunks/{13.038c0d9aa0dfa0181c4b.js → 13.c3343a7f1070061c0e10.js} +2 -2
- udata/static/chunks/{13.038c0d9aa0dfa0181c4b.js.map → 13.c3343a7f1070061c0e10.js.map} +1 -1
- udata/static/chunks/{16.0baa2b64a74a2dcde25c.js → 16.8fa42440ad75ca172e6d.js} +2 -2
- udata/static/chunks/{16.0baa2b64a74a2dcde25c.js.map → 16.8fa42440ad75ca172e6d.js.map} +1 -1
- udata/static/chunks/{19.350a9f150b074b4ecefa.js → 19.9c6c8412729cd6d59cfa.js} +3 -3
- udata/static/chunks/{19.350a9f150b074b4ecefa.js.map → 19.9c6c8412729cd6d59cfa.js.map} +1 -1
- udata/static/chunks/{5.6ebbce2b9b3e696d3da5.js → 5.71d15c2e4f21feee2a9a.js} +3 -3
- udata/static/chunks/{5.6ebbce2b9b3e696d3da5.js.map → 5.71d15c2e4f21feee2a9a.js.map} +1 -1
- udata/static/chunks/{6.d8a5f7b017bcbd083641.js → 6.9139dc098b8ea640b890.js} +3 -3
- udata/static/chunks/{6.d8a5f7b017bcbd083641.js.map → 6.9139dc098b8ea640b890.js.map} +1 -1
- udata/static/common.js +1 -1
- udata/static/common.js.map +1 -1
- udata/storage/s3.py +20 -13
- udata/tags.py +4 -5
- udata/tasks.py +43 -42
- udata/tests/__init__.py +9 -6
- udata/tests/api/__init__.py +5 -6
- udata/tests/api/test_auth_api.py +395 -321
- udata/tests/api/test_base_api.py +31 -33
- udata/tests/api/test_contact_points.py +7 -9
- udata/tests/api/test_dataservices_api.py +211 -158
- udata/tests/api/test_datasets_api.py +823 -812
- udata/tests/api/test_follow_api.py +13 -15
- udata/tests/api/test_me_api.py +95 -112
- udata/tests/api/test_organizations_api.py +301 -339
- udata/tests/api/test_reports_api.py +35 -25
- udata/tests/api/test_reuses_api.py +134 -139
- udata/tests/api/test_swagger.py +5 -5
- udata/tests/api/test_tags_api.py +18 -25
- udata/tests/api/test_topics_api.py +94 -94
- udata/tests/api/test_transfer_api.py +53 -48
- udata/tests/api/test_user_api.py +128 -141
- udata/tests/apiv2/test_datasets.py +290 -198
- udata/tests/apiv2/test_me_api.py +10 -11
- udata/tests/apiv2/test_organizations.py +56 -74
- udata/tests/apiv2/test_swagger.py +5 -5
- udata/tests/apiv2/test_topics.py +69 -87
- udata/tests/cli/test_cli_base.py +8 -8
- udata/tests/cli/test_db_cli.py +21 -19
- udata/tests/dataservice/test_dataservice_tasks.py +8 -12
- udata/tests/dataset/test_csv_adapter.py +44 -35
- udata/tests/dataset/test_dataset_actions.py +2 -3
- udata/tests/dataset/test_dataset_commands.py +7 -8
- udata/tests/dataset/test_dataset_events.py +36 -29
- udata/tests/dataset/test_dataset_model.py +224 -217
- udata/tests/dataset/test_dataset_rdf.py +142 -131
- udata/tests/dataset/test_dataset_tasks.py +15 -15
- udata/tests/dataset/test_resource_preview.py +10 -13
- udata/tests/features/territories/__init__.py +9 -13
- udata/tests/features/territories/test_territories_api.py +71 -91
- udata/tests/forms/test_basic_fields.py +7 -7
- udata/tests/forms/test_current_user_field.py +39 -66
- udata/tests/forms/test_daterange_field.py +31 -39
- udata/tests/forms/test_dict_field.py +28 -26
- udata/tests/forms/test_extras_fields.py +102 -76
- udata/tests/forms/test_form_field.py +8 -8
- udata/tests/forms/test_image_field.py +33 -26
- udata/tests/forms/test_model_field.py +134 -123
- udata/tests/forms/test_model_list_field.py +7 -7
- udata/tests/forms/test_nested_model_list_field.py +117 -79
- udata/tests/forms/test_publish_as_field.py +36 -65
- udata/tests/forms/test_reference_field.py +34 -53
- udata/tests/forms/test_user_forms.py +23 -21
- udata/tests/forms/test_uuid_field.py +6 -10
- udata/tests/frontend/__init__.py +9 -6
- udata/tests/frontend/test_auth.py +7 -6
- udata/tests/frontend/test_csv.py +81 -96
- udata/tests/frontend/test_hooks.py +43 -43
- udata/tests/frontend/test_markdown.py +211 -191
- udata/tests/helpers.py +32 -37
- udata/tests/models.py +2 -2
- udata/tests/organization/test_csv_adapter.py +21 -16
- udata/tests/organization/test_notifications.py +11 -18
- udata/tests/organization/test_organization_model.py +13 -13
- udata/tests/organization/test_organization_rdf.py +29 -22
- udata/tests/organization/test_organization_tasks.py +16 -17
- udata/tests/plugin.py +76 -73
- udata/tests/reuse/test_reuse_model.py +21 -21
- udata/tests/reuse/test_reuse_task.py +11 -13
- udata/tests/search/__init__.py +11 -12
- udata/tests/search/test_adapter.py +60 -70
- udata/tests/search/test_query.py +16 -16
- udata/tests/search/test_results.py +10 -7
- udata/tests/site/test_site_api.py +11 -16
- udata/tests/site/test_site_metrics.py +20 -30
- udata/tests/site/test_site_model.py +4 -5
- udata/tests/site/test_site_rdf.py +94 -78
- udata/tests/test_activity.py +17 -17
- udata/tests/test_discussions.py +292 -299
- udata/tests/test_i18n.py +37 -40
- udata/tests/test_linkchecker.py +91 -85
- udata/tests/test_mail.py +13 -17
- udata/tests/test_migrations.py +219 -180
- udata/tests/test_model.py +164 -157
- udata/tests/test_notifications.py +17 -17
- udata/tests/test_owned.py +14 -14
- udata/tests/test_rdf.py +25 -23
- udata/tests/test_routing.py +89 -93
- udata/tests/test_storages.py +137 -128
- udata/tests/test_tags.py +44 -46
- udata/tests/test_topics.py +7 -7
- udata/tests/test_transfer.py +42 -49
- udata/tests/test_uris.py +160 -161
- udata/tests/test_utils.py +79 -71
- udata/tests/user/test_user_rdf.py +5 -9
- udata/tests/workers/test_jobs_commands.py +57 -58
- udata/tests/workers/test_tasks_routing.py +23 -29
- udata/tests/workers/test_workers_api.py +125 -131
- udata/tests/workers/test_workers_helpers.py +6 -6
- udata/tracking.py +4 -6
- udata/uris.py +45 -46
- udata/utils.py +68 -66
- udata/wsgi.py +1 -1
- {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30382.dist-info}/METADATA +3 -2
- udata-9.1.2.dev30382.dist-info/RECORD +704 -0
- udata-9.1.2.dev30355.dist-info/RECORD +0 -704
- {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30382.dist-info}/LICENSE +0 -0
- {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30382.dist-info}/WHEEL +0 -0
- {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30382.dist-info}/entry_points.txt +0 -0
- {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30382.dist-info}/top_level.txt +0 -0
udata/core/dataset/rdf.py
CHANGED
|
@@ -1,47 +1,67 @@
|
|
|
1
|
-
|
|
1
|
+
"""
|
|
2
2
|
This module centralize dataset helpers for RDF/DCAT serialization and parsing
|
|
3
|
-
|
|
3
|
+
"""
|
|
4
|
+
|
|
4
5
|
import calendar
|
|
5
6
|
import json
|
|
6
7
|
import logging
|
|
7
|
-
|
|
8
8
|
from datetime import date
|
|
9
|
+
|
|
9
10
|
from dateutil.parser import parse as parse_dt
|
|
10
11
|
from flask import current_app
|
|
11
12
|
from geomet import wkt
|
|
12
|
-
from rdflib import Graph, URIRef, Literal, BNode
|
|
13
|
-
from rdflib.resource import Resource as RdfResource
|
|
14
|
-
from rdflib.namespace import RDF
|
|
15
13
|
from mongoengine.errors import ValidationError
|
|
14
|
+
from rdflib import BNode, Graph, Literal, URIRef
|
|
15
|
+
from rdflib.namespace import RDF
|
|
16
|
+
from rdflib.resource import Resource as RdfResource
|
|
16
17
|
|
|
17
18
|
from udata import i18n, uris
|
|
18
|
-
from udata.core.spatial.models import SpatialCoverage
|
|
19
19
|
from udata.core.dataset.models import HarvestDatasetMetadata, HarvestResourceMetadata
|
|
20
|
+
from udata.core.spatial.models import SpatialCoverage
|
|
20
21
|
from udata.harvest.exceptions import HarvestSkipException
|
|
21
22
|
from udata.models import db
|
|
22
23
|
from udata.rdf import (
|
|
23
|
-
DCAT,
|
|
24
|
-
|
|
24
|
+
DCAT,
|
|
25
|
+
DCATAP,
|
|
26
|
+
DCT,
|
|
27
|
+
EUFORMAT,
|
|
28
|
+
EUFREQ,
|
|
29
|
+
FREQ,
|
|
30
|
+
HVD_LEGISLATION,
|
|
31
|
+
IANAFORMAT,
|
|
32
|
+
RDFS,
|
|
33
|
+
SCHEMA,
|
|
34
|
+
SCV,
|
|
35
|
+
SKOS,
|
|
36
|
+
SPDX,
|
|
37
|
+
TAG_TO_EU_HVD_CATEGORIES,
|
|
25
38
|
contact_point_from_rdf,
|
|
39
|
+
namespace_manager,
|
|
40
|
+
rdf_value,
|
|
41
|
+
remote_url_from_rdf,
|
|
42
|
+
sanitize_html,
|
|
43
|
+
schema_from_rdf,
|
|
44
|
+
themes_from_rdf,
|
|
45
|
+
url_from_rdf,
|
|
26
46
|
)
|
|
27
|
-
from udata.utils import get_by, safe_unicode
|
|
28
47
|
from udata.uris import endpoint_for
|
|
48
|
+
from udata.utils import get_by, safe_unicode
|
|
29
49
|
|
|
30
|
-
from .models import Dataset, Resource, Checksum, License
|
|
31
50
|
from .constants import UPDATE_FREQUENCIES
|
|
51
|
+
from .models import Checksum, Dataset, License, Resource
|
|
32
52
|
|
|
33
53
|
log = logging.getLogger(__name__)
|
|
34
54
|
|
|
35
55
|
# Map extra frequencies (ie. not defined in Dublin Core) to closest equivalent
|
|
36
56
|
RDF_FREQUENCIES = {
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
57
|
+
"punctual": None,
|
|
58
|
+
"hourly": FREQ.continuous,
|
|
59
|
+
"fourTimesADay": FREQ.daily,
|
|
60
|
+
"threeTimesADay": FREQ.daily,
|
|
61
|
+
"semidaily": FREQ.daily,
|
|
62
|
+
"fourTimesAWeek": FREQ.threeTimesAWeek,
|
|
63
|
+
"quinquennial": None,
|
|
64
|
+
"unknown": None,
|
|
45
65
|
}
|
|
46
66
|
|
|
47
67
|
# Map european frequencies to their closest equivalent
|
|
@@ -50,31 +70,32 @@ RDF_FREQUENCIES = {
|
|
|
50
70
|
# - https://publications.europa.eu/en/web/eu-vocabularies/at-dataset/-/resource/dataset/frequency # noqa: E501
|
|
51
71
|
EU_RDF_REQUENCIES = {
|
|
52
72
|
# Match Dublin Core name
|
|
53
|
-
EUFREQ.ANNUAL:
|
|
54
|
-
EUFREQ.BIENNIAL:
|
|
55
|
-
EUFREQ.TRIENNIAL:
|
|
56
|
-
EUFREQ.QUARTERLY:
|
|
57
|
-
EUFREQ.MONTHLY:
|
|
58
|
-
EUFREQ.BIMONTHLY:
|
|
59
|
-
EUFREQ.WEEKLY:
|
|
60
|
-
EUFREQ.BIWEEKLY:
|
|
61
|
-
EUFREQ.DAILY:
|
|
73
|
+
EUFREQ.ANNUAL: "annual",
|
|
74
|
+
EUFREQ.BIENNIAL: "biennial",
|
|
75
|
+
EUFREQ.TRIENNIAL: "triennial",
|
|
76
|
+
EUFREQ.QUARTERLY: "quarterly",
|
|
77
|
+
EUFREQ.MONTHLY: "monthly",
|
|
78
|
+
EUFREQ.BIMONTHLY: "bimonthly",
|
|
79
|
+
EUFREQ.WEEKLY: "weekly",
|
|
80
|
+
EUFREQ.BIWEEKLY: "biweekly",
|
|
81
|
+
EUFREQ.DAILY: "daily",
|
|
62
82
|
# Name differs from Dublin Core
|
|
63
|
-
EUFREQ.ANNUAL_2:
|
|
64
|
-
EUFREQ.ANNUAL_3:
|
|
65
|
-
EUFREQ.MONTHLY_2:
|
|
66
|
-
EUFREQ.MONTHLY_3:
|
|
67
|
-
EUFREQ.WEEKLY_2:
|
|
68
|
-
EUFREQ.WEEKLY_3:
|
|
69
|
-
EUFREQ.DAILY_2:
|
|
70
|
-
EUFREQ.CONT:
|
|
71
|
-
EUFREQ.UPDATE_CONT:
|
|
72
|
-
EUFREQ.IRREG:
|
|
73
|
-
EUFREQ.UNKNOWN:
|
|
74
|
-
EUFREQ.OTHER:
|
|
75
|
-
EUFREQ.NEVER:
|
|
83
|
+
EUFREQ.ANNUAL_2: "semiannual",
|
|
84
|
+
EUFREQ.ANNUAL_3: "threeTimesAYear",
|
|
85
|
+
EUFREQ.MONTHLY_2: "semimonthly",
|
|
86
|
+
EUFREQ.MONTHLY_3: "threeTimesAMonth",
|
|
87
|
+
EUFREQ.WEEKLY_2: "semiweekly",
|
|
88
|
+
EUFREQ.WEEKLY_3: "threeTimesAWeek",
|
|
89
|
+
EUFREQ.DAILY_2: "semidaily",
|
|
90
|
+
EUFREQ.CONT: "continuous",
|
|
91
|
+
EUFREQ.UPDATE_CONT: "continuous",
|
|
92
|
+
EUFREQ.IRREG: "irregular",
|
|
93
|
+
EUFREQ.UNKNOWN: "unknown",
|
|
94
|
+
EUFREQ.OTHER: "unknown",
|
|
95
|
+
EUFREQ.NEVER: "punctual",
|
|
76
96
|
}
|
|
77
97
|
|
|
98
|
+
|
|
78
99
|
def temporal_to_rdf(daterange, graph=None):
|
|
79
100
|
if not daterange:
|
|
80
101
|
return
|
|
@@ -104,18 +125,25 @@ def owner_to_rdf(dataset, graph=None):
|
|
|
104
125
|
|
|
105
126
|
|
|
106
127
|
def resource_to_rdf(resource, dataset=None, graph=None, is_hvd=False):
|
|
107
|
-
|
|
128
|
+
"""
|
|
108
129
|
Map a Resource domain model to a DCAT/RDF graph
|
|
109
|
-
|
|
130
|
+
"""
|
|
110
131
|
graph = graph or Graph(namespace_manager=namespace_manager)
|
|
111
132
|
if dataset and dataset.id:
|
|
112
|
-
id = URIRef(
|
|
113
|
-
|
|
114
|
-
|
|
133
|
+
id = URIRef(
|
|
134
|
+
endpoint_for(
|
|
135
|
+
"datasets.show_redirect",
|
|
136
|
+
"api.dataset",
|
|
137
|
+
dataset=dataset.id,
|
|
138
|
+
_external=True,
|
|
139
|
+
_anchor="resource-{0}".format(resource.id),
|
|
140
|
+
)
|
|
141
|
+
)
|
|
115
142
|
else:
|
|
116
143
|
id = BNode(resource.id)
|
|
117
|
-
permalink = endpoint_for(
|
|
118
|
-
|
|
144
|
+
permalink = endpoint_for(
|
|
145
|
+
"datasets.resource", "api.resource_redirect", id=resource.id, _external=True
|
|
146
|
+
)
|
|
119
147
|
r = graph.resource(id)
|
|
120
148
|
r.set(RDF.type, DCAT.Distribution)
|
|
121
149
|
r.set(DCT.identifier, Literal(resource.id))
|
|
@@ -138,7 +166,7 @@ def resource_to_rdf(resource, dataset=None, graph=None, is_hvd=False):
|
|
|
138
166
|
if resource.checksum:
|
|
139
167
|
checksum = graph.resource(BNode())
|
|
140
168
|
checksum.set(RDF.type, SPDX.Checksum)
|
|
141
|
-
algorithm =
|
|
169
|
+
algorithm = "checksumAlgorithm_{0}".format(resource.checksum.type)
|
|
142
170
|
checksum.add(SPDX.algorithm, getattr(SPDX, algorithm))
|
|
143
171
|
checksum.add(SPDX.checksumValue, Literal(resource.checksum.value))
|
|
144
172
|
r.add(SPDX.checksum, checksum)
|
|
@@ -148,21 +176,25 @@ def resource_to_rdf(resource, dataset=None, graph=None, is_hvd=False):
|
|
|
148
176
|
return r
|
|
149
177
|
|
|
150
178
|
|
|
151
|
-
def dataset_to_graph_id(dataset: Dataset) -> URIRef | BNode:
|
|
179
|
+
def dataset_to_graph_id(dataset: Dataset) -> URIRef | BNode:
|
|
152
180
|
if dataset.harvest and dataset.harvest.uri:
|
|
153
181
|
return URIRef(dataset.harvest.uri)
|
|
154
182
|
elif dataset.id:
|
|
155
|
-
return URIRef(
|
|
156
|
-
|
|
183
|
+
return URIRef(
|
|
184
|
+
endpoint_for(
|
|
185
|
+
"datasets.show_redirect", "api.dataset", dataset=dataset.id, _external=True
|
|
186
|
+
)
|
|
187
|
+
)
|
|
157
188
|
else:
|
|
158
189
|
# Should not happen in production. Some test only
|
|
159
190
|
# `build()` a dataset without saving it to the DB.
|
|
160
191
|
return BNode()
|
|
161
192
|
|
|
193
|
+
|
|
162
194
|
def dataset_to_rdf(dataset, graph=None):
|
|
163
|
-
|
|
195
|
+
"""
|
|
164
196
|
Map a dataset domain model to a DCAT/RDF graph
|
|
165
|
-
|
|
197
|
+
"""
|
|
166
198
|
# Use the unlocalized permalink to the dataset as URI when available
|
|
167
199
|
# unless there is already an upstream URI
|
|
168
200
|
id = dataset_to_graph_id(dataset)
|
|
@@ -187,7 +219,7 @@ def dataset_to_rdf(dataset, graph=None):
|
|
|
187
219
|
|
|
188
220
|
# Add DCAT-AP HVD properties if the dataset is tagged hvd.
|
|
189
221
|
# See https://semiceu.github.io/DCAT-AP/releases/2.2.0-hvd/
|
|
190
|
-
is_hvd = current_app.config[
|
|
222
|
+
is_hvd = current_app.config["HVD_SUPPORT"] and "hvd" in dataset.tags
|
|
191
223
|
if is_hvd:
|
|
192
224
|
d.add(DCATAP.applicableLegislation, URIRef(HVD_LEGISLATION))
|
|
193
225
|
|
|
@@ -215,52 +247,46 @@ def dataset_to_rdf(dataset, graph=None):
|
|
|
215
247
|
|
|
216
248
|
|
|
217
249
|
CHECKSUM_ALGORITHMS = {
|
|
218
|
-
SPDX.checksumAlgorithm_md5:
|
|
219
|
-
SPDX.checksumAlgorithm_sha1:
|
|
220
|
-
SPDX.checksumAlgorithm_sha256:
|
|
250
|
+
SPDX.checksumAlgorithm_md5: "md5",
|
|
251
|
+
SPDX.checksumAlgorithm_sha1: "sha1",
|
|
252
|
+
SPDX.checksumAlgorithm_sha256: "sha256",
|
|
221
253
|
}
|
|
222
254
|
|
|
223
255
|
|
|
224
256
|
def temporal_from_literal(text):
|
|
225
|
-
|
|
257
|
+
"""
|
|
226
258
|
Parse a temporal coverage from a literal ie. either:
|
|
227
259
|
- an ISO date range
|
|
228
260
|
- a single ISO date period (month,year)
|
|
229
|
-
|
|
230
|
-
if text.count(
|
|
261
|
+
"""
|
|
262
|
+
if text.count("/") == 1:
|
|
231
263
|
# This is an ISO date range as preconized by Gov.uk
|
|
232
264
|
# http://guidance.data.gov.uk/dcat_fields.html
|
|
233
|
-
start, end = text.split(
|
|
234
|
-
return db.DateRange(
|
|
235
|
-
start=parse_dt(start).date(),
|
|
236
|
-
end=parse_dt(end).date()
|
|
237
|
-
)
|
|
265
|
+
start, end = text.split("/")
|
|
266
|
+
return db.DateRange(start=parse_dt(start).date(), end=parse_dt(end).date())
|
|
238
267
|
else:
|
|
239
|
-
separators = text.count(
|
|
268
|
+
separators = text.count("-")
|
|
240
269
|
if separators == 0:
|
|
241
270
|
# this is a year
|
|
242
|
-
return db.DateRange(
|
|
243
|
-
start=date(int(text), 1, 1),
|
|
244
|
-
end=date(int(text), 12, 31)
|
|
245
|
-
)
|
|
271
|
+
return db.DateRange(start=date(int(text), 1, 1), end=date(int(text), 12, 31))
|
|
246
272
|
elif separators == 1:
|
|
247
273
|
# this is a month
|
|
248
274
|
dt = parse_dt(text).date()
|
|
249
275
|
return db.DateRange(
|
|
250
276
|
start=dt.replace(day=1),
|
|
251
|
-
end=dt.replace(day=calendar.monthrange(dt.year, dt.month)[1])
|
|
277
|
+
end=dt.replace(day=calendar.monthrange(dt.year, dt.month)[1]),
|
|
252
278
|
)
|
|
253
279
|
|
|
254
280
|
|
|
255
281
|
def temporal_from_resource(resource):
|
|
256
|
-
|
|
282
|
+
"""
|
|
257
283
|
Parse a temporal coverage from a RDF class/resource ie. either:
|
|
258
284
|
- a `dct:PeriodOfTime` with schema.org `startDate` and `endDate` properties
|
|
259
285
|
- a `dct:PeriodOfTime` with DCAT `startDate` and `endDate` properties
|
|
260
286
|
- an inline gov.uk Time Interval value
|
|
261
287
|
- an URI reference to a gov.uk Time Interval ontology
|
|
262
288
|
http://reference.data.gov.uk/
|
|
263
|
-
|
|
289
|
+
"""
|
|
264
290
|
if isinstance(resource.identifier, URIRef):
|
|
265
291
|
# Fetch remote ontology if necessary
|
|
266
292
|
g = Graph().parse(str(resource.identifier))
|
|
@@ -268,22 +294,21 @@ def temporal_from_resource(resource):
|
|
|
268
294
|
if resource.value(SCHEMA.startDate):
|
|
269
295
|
return db.DateRange(
|
|
270
296
|
start=resource.value(SCHEMA.startDate).toPython(),
|
|
271
|
-
end=resource.value(SCHEMA.endDate).toPython()
|
|
297
|
+
end=resource.value(SCHEMA.endDate).toPython(),
|
|
272
298
|
)
|
|
273
299
|
elif resource.value(DCAT.startDate):
|
|
274
300
|
return db.DateRange(
|
|
275
301
|
start=resource.value(DCAT.startDate).toPython(),
|
|
276
|
-
end=resource.value(DCAT.endDate).toPython()
|
|
302
|
+
end=resource.value(DCAT.endDate).toPython(),
|
|
277
303
|
)
|
|
278
304
|
elif resource.value(SCV.min):
|
|
279
305
|
return db.DateRange(
|
|
280
|
-
start=resource.value(SCV.min).toPython(),
|
|
281
|
-
end=resource.value(SCV.max).toPython()
|
|
306
|
+
start=resource.value(SCV.min).toPython(), end=resource.value(SCV.max).toPython()
|
|
282
307
|
)
|
|
283
308
|
|
|
284
309
|
|
|
285
310
|
def temporal_from_rdf(period_of_time):
|
|
286
|
-
|
|
311
|
+
"""Failsafe parsing of a temporal coverage"""
|
|
287
312
|
try:
|
|
288
313
|
if isinstance(period_of_time, Literal):
|
|
289
314
|
return temporal_from_literal(str(period_of_time))
|
|
@@ -293,30 +318,34 @@ def temporal_from_rdf(period_of_time):
|
|
|
293
318
|
# There are a lot of cases where parsing could/should fail
|
|
294
319
|
# but we never want to break the whole dataset parsing
|
|
295
320
|
# so we log the error for future investigation and improvement
|
|
296
|
-
log.warning(
|
|
321
|
+
log.warning("Unable to parse temporal coverage", exc_info=True)
|
|
322
|
+
|
|
297
323
|
|
|
298
324
|
def spatial_from_rdf(graph):
|
|
299
325
|
geojsons = []
|
|
300
326
|
for term in graph.objects(DCT.spatial):
|
|
301
327
|
try:
|
|
302
|
-
# This may not be official in the norm but some ArcGis return
|
|
328
|
+
# This may not be official in the norm but some ArcGis return
|
|
303
329
|
# bbox as literal directly in DCT.spatial.
|
|
304
330
|
if isinstance(term, Literal):
|
|
305
331
|
geojson = bbox_to_geojson_multipolygon(term.toPython())
|
|
306
332
|
if geojson is not None:
|
|
307
333
|
geojsons.append(geojson)
|
|
308
|
-
|
|
334
|
+
|
|
309
335
|
continue
|
|
310
336
|
|
|
311
337
|
for object in term.objects():
|
|
312
338
|
if isinstance(object, Literal):
|
|
313
|
-
if
|
|
339
|
+
if (
|
|
340
|
+
object.datatype.__str__()
|
|
341
|
+
== "https://www.iana.org/assignments/media-types/application/vnd.geo+json"
|
|
342
|
+
):
|
|
314
343
|
try:
|
|
315
344
|
geojson = json.loads(object.toPython())
|
|
316
345
|
except ValueError as e:
|
|
317
346
|
log.warning(f"Invalid JSON in spatial GeoJSON {object.toPython()} {e}")
|
|
318
347
|
continue
|
|
319
|
-
elif object.datatype.__str__() ==
|
|
348
|
+
elif object.datatype.__str__() == "http://www.opengis.net/rdf#wktLiteral":
|
|
320
349
|
try:
|
|
321
350
|
# .upper() si here because geomet doesn't support Polygon but only POLYGON
|
|
322
351
|
geojson = wkt.loads(object.toPython().strip().upper())
|
|
@@ -328,7 +357,9 @@ def spatial_from_rdf(graph):
|
|
|
328
357
|
|
|
329
358
|
geojsons.append(geojson)
|
|
330
359
|
except Exception as e:
|
|
331
|
-
log.exception(
|
|
360
|
+
log.exception(
|
|
361
|
+
f"Exception during `spatial_from_rdf` for term {term}: {e}", stack_info=True
|
|
362
|
+
)
|
|
332
363
|
|
|
333
364
|
if not geojsons:
|
|
334
365
|
return None
|
|
@@ -339,16 +370,16 @@ def spatial_from_rdf(graph):
|
|
|
339
370
|
# if there are other types of spatial coverage worth integrating (points? line strings?). But these other
|
|
340
371
|
# formats are not compatible to be merged in the unique stored representation in MongoDB, we'll deal with them in a second pass.
|
|
341
372
|
# The merging lose the properties and other information inside the GeoJSON…
|
|
342
|
-
# Note that having multiple `Polygon` is not really the DCAT way of doing things, the standard require that you use
|
|
373
|
+
# Note that having multiple `Polygon` is not really the DCAT way of doing things, the standard require that you use
|
|
343
374
|
# a `MultiPolygon` in this case. We support this right now, and wait and see if it raises problems in the future for
|
|
344
375
|
# people following the standard. (see https://github.com/datagouv/data.gouv.fr/issues/1362#issuecomment-2112774115)
|
|
345
376
|
polygons = []
|
|
346
377
|
for geojson in geojsons:
|
|
347
|
-
if geojson[
|
|
348
|
-
if geojson[
|
|
349
|
-
polygons.append(geojson[
|
|
350
|
-
elif geojson[
|
|
351
|
-
for coordinates in geojson[
|
|
378
|
+
if geojson["type"] == "Polygon":
|
|
379
|
+
if geojson["coordinates"] not in polygons:
|
|
380
|
+
polygons.append(geojson["coordinates"])
|
|
381
|
+
elif geojson["type"] == "MultiPolygon":
|
|
382
|
+
for coordinates in geojson["coordinates"]:
|
|
352
383
|
if coordinates not in polygons:
|
|
353
384
|
polygons.append(coordinates)
|
|
354
385
|
else:
|
|
@@ -359,10 +390,12 @@ def spatial_from_rdf(graph):
|
|
|
359
390
|
log.warning(f"No supported types found in the GeoJSON data.")
|
|
360
391
|
return None
|
|
361
392
|
|
|
362
|
-
spatial_coverage = SpatialCoverage(
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
393
|
+
spatial_coverage = SpatialCoverage(
|
|
394
|
+
geom={
|
|
395
|
+
"type": "MultiPolygon",
|
|
396
|
+
"coordinates": polygons,
|
|
397
|
+
}
|
|
398
|
+
)
|
|
366
399
|
|
|
367
400
|
try:
|
|
368
401
|
spatial_coverage.clean()
|
|
@@ -397,7 +430,7 @@ def mime_from_rdf(resource):
|
|
|
397
430
|
if not mime:
|
|
398
431
|
return
|
|
399
432
|
if IANAFORMAT in mime:
|
|
400
|
-
return
|
|
433
|
+
return "/".join(mime.split("/")[-2:])
|
|
401
434
|
if isinstance(mime, str):
|
|
402
435
|
return mime
|
|
403
436
|
|
|
@@ -413,36 +446,36 @@ def format_from_rdf(resource):
|
|
|
413
446
|
|
|
414
447
|
|
|
415
448
|
def title_from_rdf(rdf, url):
|
|
416
|
-
|
|
449
|
+
"""
|
|
417
450
|
Try to extract a distribution title from a property.
|
|
418
451
|
As it's not a mandatory property,
|
|
419
452
|
it fallback on building a title from the URL
|
|
420
453
|
then the format and in last ressort a generic resource name.
|
|
421
|
-
|
|
454
|
+
"""
|
|
422
455
|
title = rdf_value(rdf, DCT.title)
|
|
423
456
|
if title:
|
|
424
457
|
return title
|
|
425
458
|
if url:
|
|
426
|
-
last_part = url.split(
|
|
427
|
-
if
|
|
459
|
+
last_part = url.split("/")[-1]
|
|
460
|
+
if "." in last_part and "?" not in last_part:
|
|
428
461
|
return last_part
|
|
429
462
|
fmt = rdf_value(rdf, DCT.format)
|
|
430
|
-
lang = current_app.config[
|
|
463
|
+
lang = current_app.config["DEFAULT_LANGUAGE"]
|
|
431
464
|
with i18n.language(lang):
|
|
432
465
|
if fmt:
|
|
433
|
-
return i18n._(
|
|
466
|
+
return i18n._("{format} resource").format(format=fmt.lower())
|
|
434
467
|
else:
|
|
435
|
-
return i18n._(
|
|
468
|
+
return i18n._("Nameless resource")
|
|
469
|
+
|
|
436
470
|
|
|
437
471
|
def resource_from_rdf(graph_or_distrib, dataset=None, is_additionnal=False):
|
|
438
|
-
|
|
472
|
+
"""
|
|
439
473
|
Map a Resource domain model to a DCAT/RDF graph
|
|
440
|
-
|
|
474
|
+
"""
|
|
441
475
|
if isinstance(graph_or_distrib, RdfResource):
|
|
442
476
|
distrib = graph_or_distrib
|
|
443
477
|
else:
|
|
444
|
-
node = graph_or_distrib.value(predicate=RDF.type,
|
|
445
|
-
object=DCAT.Distribution)
|
|
478
|
+
node = graph_or_distrib.value(predicate=RDF.type, object=DCAT.Distribution)
|
|
446
479
|
distrib = graph_or_distrib.resource(node)
|
|
447
480
|
|
|
448
481
|
if not is_additionnal:
|
|
@@ -453,16 +486,16 @@ def resource_from_rdf(graph_or_distrib, dataset=None, is_additionnal=False):
|
|
|
453
486
|
url = distrib.identifier.toPython() if isinstance(distrib.identifier, URIRef) else None
|
|
454
487
|
# we shouldn't create resources without URLs
|
|
455
488
|
if not url:
|
|
456
|
-
log.warning(f
|
|
489
|
+
log.warning(f"Resource without url: {distrib}")
|
|
457
490
|
return
|
|
458
491
|
|
|
459
492
|
if dataset:
|
|
460
|
-
resource = get_by(dataset.resources,
|
|
493
|
+
resource = get_by(dataset.resources, "url", url)
|
|
461
494
|
if not dataset or not resource:
|
|
462
495
|
resource = Resource()
|
|
463
496
|
if dataset:
|
|
464
497
|
dataset.resources.append(resource)
|
|
465
|
-
resource.filetype =
|
|
498
|
+
resource.filetype = "remote"
|
|
466
499
|
resource.title = title_from_rdf(distrib, url)
|
|
467
500
|
resource.url = url
|
|
468
501
|
resource.description = sanitize_html(distrib.value(DCT.description))
|
|
@@ -482,7 +515,7 @@ def resource_from_rdf(graph_or_distrib, dataset=None, is_additionnal=False):
|
|
|
482
515
|
resource.checksum.value = rdf_value(checksum, SPDX.checksumValue)
|
|
483
516
|
resource.checksum.type = algorithm
|
|
484
517
|
if is_additionnal:
|
|
485
|
-
resource.type =
|
|
518
|
+
resource.type = "other"
|
|
486
519
|
|
|
487
520
|
identifier = rdf_value(distrib, DCT.identifier)
|
|
488
521
|
uri = distrib.identifier.toPython() if isinstance(distrib.identifier, URIRef) else None
|
|
@@ -500,9 +533,9 @@ def resource_from_rdf(graph_or_distrib, dataset=None, is_additionnal=False):
|
|
|
500
533
|
|
|
501
534
|
|
|
502
535
|
def dataset_from_rdf(graph: Graph, dataset=None, node=None):
|
|
503
|
-
|
|
536
|
+
"""
|
|
504
537
|
Create or update a dataset from a RDF/DCAT graph
|
|
505
|
-
|
|
538
|
+
"""
|
|
506
539
|
dataset = dataset or Dataset()
|
|
507
540
|
|
|
508
541
|
if node is None: # Assume first match is the only match
|
|
@@ -542,13 +575,13 @@ def dataset_from_rdf(graph: Graph, dataset=None, node=None):
|
|
|
542
575
|
if access_rights:
|
|
543
576
|
dataset.extras["harvest"] = {
|
|
544
577
|
"dct:accessRights": access_rights,
|
|
545
|
-
**dataset.extras.get("harvest", {})
|
|
578
|
+
**dataset.extras.get("harvest", {}),
|
|
546
579
|
}
|
|
547
580
|
provenance = [p.value(RDFS.label) for p in d.objects(DCT.provenance)]
|
|
548
581
|
if provenance:
|
|
549
582
|
dataset.extras["harvest"] = {
|
|
550
583
|
"dct:provenance": provenance,
|
|
551
|
-
**dataset.extras.get("harvest", {})
|
|
584
|
+
**dataset.extras.get("harvest", {}),
|
|
552
585
|
}
|
|
553
586
|
|
|
554
587
|
licenses = set()
|
|
@@ -584,11 +617,12 @@ def dataset_from_rdf(graph: Graph, dataset=None, node=None):
|
|
|
584
617
|
|
|
585
618
|
return dataset
|
|
586
619
|
|
|
587
|
-
|
|
588
|
-
|
|
620
|
+
|
|
621
|
+
def bbox_to_geojson_multipolygon(bbox_as_str: str) -> dict | None:
|
|
622
|
+
bbox = bbox_as_str.strip().split(",")
|
|
589
623
|
if len(bbox) != 4:
|
|
590
624
|
return None
|
|
591
|
-
|
|
625
|
+
|
|
592
626
|
west = float(bbox[0])
|
|
593
627
|
south = float(bbox[1])
|
|
594
628
|
east = float(bbox[2])
|
|
@@ -600,10 +634,10 @@ def bbox_to_geojson_multipolygon(bbox_as_str: str) -> dict | None:
|
|
|
600
634
|
low_right = [east, south]
|
|
601
635
|
|
|
602
636
|
return {
|
|
603
|
-
|
|
604
|
-
|
|
637
|
+
"type": "MultiPolygon",
|
|
638
|
+
"coordinates": [
|
|
605
639
|
[
|
|
606
640
|
[low_left, low_right, top_right, top_left, low_left],
|
|
607
|
-
],
|
|
641
|
+
],
|
|
608
642
|
],
|
|
609
|
-
}
|
|
643
|
+
}
|