udata 9.1.2.dev30355__py2.py3-none-any.whl → 9.1.2.dev30382__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of udata might be problematic. Click here for more details.
- tasks/__init__.py +109 -107
- tasks/helpers.py +18 -18
- udata/__init__.py +4 -4
- udata/admin/views.py +5 -5
- udata/api/__init__.py +135 -124
- udata/api/commands.py +45 -37
- udata/api/errors.py +5 -4
- udata/api/fields.py +23 -21
- udata/api/oauth2.py +55 -74
- udata/api/parsers.py +15 -15
- udata/api/signals.py +1 -1
- udata/api_fields.py +137 -89
- udata/app.py +56 -54
- udata/assets.py +5 -5
- udata/auth/__init__.py +37 -26
- udata/auth/forms.py +23 -15
- udata/auth/helpers.py +1 -1
- udata/auth/mails.py +3 -3
- udata/auth/password_validation.py +19 -15
- udata/auth/views.py +94 -68
- udata/commands/__init__.py +71 -69
- udata/commands/cache.py +7 -7
- udata/commands/db.py +201 -140
- udata/commands/dcat.py +36 -30
- udata/commands/fixtures.py +100 -84
- udata/commands/images.py +21 -20
- udata/commands/info.py +17 -20
- udata/commands/init.py +10 -10
- udata/commands/purge.py +12 -13
- udata/commands/serve.py +41 -29
- udata/commands/static.py +16 -18
- udata/commands/test.py +20 -20
- udata/commands/tests/fixtures.py +26 -24
- udata/commands/worker.py +31 -33
- udata/core/__init__.py +12 -12
- udata/core/activity/__init__.py +0 -1
- udata/core/activity/api.py +59 -49
- udata/core/activity/models.py +28 -26
- udata/core/activity/signals.py +1 -1
- udata/core/activity/tasks.py +16 -10
- udata/core/badges/api.py +6 -6
- udata/core/badges/commands.py +14 -13
- udata/core/badges/fields.py +8 -5
- udata/core/badges/forms.py +7 -4
- udata/core/badges/models.py +16 -31
- udata/core/badges/permissions.py +1 -3
- udata/core/badges/signals.py +2 -2
- udata/core/badges/tasks.py +3 -2
- udata/core/badges/tests/test_commands.py +10 -10
- udata/core/badges/tests/test_model.py +24 -31
- udata/core/contact_point/api.py +19 -18
- udata/core/contact_point/api_fields.py +21 -14
- udata/core/contact_point/factories.py +2 -2
- udata/core/contact_point/forms.py +7 -6
- udata/core/contact_point/models.py +3 -5
- udata/core/dataservices/api.py +26 -21
- udata/core/dataservices/factories.py +13 -11
- udata/core/dataservices/models.py +35 -40
- udata/core/dataservices/permissions.py +4 -4
- udata/core/dataservices/rdf.py +40 -17
- udata/core/dataservices/tasks.py +4 -3
- udata/core/dataset/actions.py +10 -10
- udata/core/dataset/activities.py +21 -23
- udata/core/dataset/api.py +321 -298
- udata/core/dataset/api_fields.py +443 -271
- udata/core/dataset/apiv2.py +305 -229
- udata/core/dataset/commands.py +38 -36
- udata/core/dataset/constants.py +61 -54
- udata/core/dataset/csv.py +70 -74
- udata/core/dataset/events.py +39 -32
- udata/core/dataset/exceptions.py +8 -4
- udata/core/dataset/factories.py +57 -65
- udata/core/dataset/forms.py +87 -63
- udata/core/dataset/models.py +336 -280
- udata/core/dataset/permissions.py +9 -6
- udata/core/dataset/preview.py +15 -17
- udata/core/dataset/rdf.py +156 -122
- udata/core/dataset/search.py +92 -77
- udata/core/dataset/signals.py +1 -1
- udata/core/dataset/tasks.py +63 -54
- udata/core/discussions/actions.py +5 -5
- udata/core/discussions/api.py +124 -120
- udata/core/discussions/factories.py +2 -2
- udata/core/discussions/forms.py +9 -7
- udata/core/discussions/metrics.py +1 -3
- udata/core/discussions/models.py +25 -24
- udata/core/discussions/notifications.py +18 -14
- udata/core/discussions/permissions.py +3 -3
- udata/core/discussions/signals.py +4 -4
- udata/core/discussions/tasks.py +24 -28
- udata/core/followers/api.py +32 -33
- udata/core/followers/models.py +9 -9
- udata/core/followers/signals.py +3 -3
- udata/core/jobs/actions.py +7 -7
- udata/core/jobs/api.py +99 -92
- udata/core/jobs/commands.py +48 -49
- udata/core/jobs/forms.py +11 -11
- udata/core/jobs/models.py +6 -6
- udata/core/metrics/__init__.py +2 -2
- udata/core/metrics/commands.py +34 -30
- udata/core/metrics/models.py +2 -4
- udata/core/metrics/signals.py +1 -1
- udata/core/metrics/tasks.py +3 -3
- udata/core/organization/activities.py +12 -15
- udata/core/organization/api.py +167 -174
- udata/core/organization/api_fields.py +183 -124
- udata/core/organization/apiv2.py +32 -32
- udata/core/organization/commands.py +20 -22
- udata/core/organization/constants.py +11 -11
- udata/core/organization/csv.py +17 -15
- udata/core/organization/factories.py +8 -11
- udata/core/organization/forms.py +32 -26
- udata/core/organization/metrics.py +2 -1
- udata/core/organization/models.py +87 -67
- udata/core/organization/notifications.py +18 -14
- udata/core/organization/permissions.py +10 -11
- udata/core/organization/rdf.py +14 -14
- udata/core/organization/search.py +30 -28
- udata/core/organization/signals.py +7 -7
- udata/core/organization/tasks.py +42 -61
- udata/core/owned.py +38 -27
- udata/core/post/api.py +82 -81
- udata/core/post/constants.py +8 -5
- udata/core/post/factories.py +4 -4
- udata/core/post/forms.py +13 -14
- udata/core/post/models.py +20 -22
- udata/core/post/tests/test_api.py +30 -32
- udata/core/reports/api.py +8 -7
- udata/core/reports/constants.py +1 -3
- udata/core/reports/models.py +10 -10
- udata/core/reuse/activities.py +15 -19
- udata/core/reuse/api.py +123 -126
- udata/core/reuse/api_fields.py +120 -85
- udata/core/reuse/apiv2.py +11 -10
- udata/core/reuse/constants.py +23 -23
- udata/core/reuse/csv.py +18 -18
- udata/core/reuse/factories.py +5 -9
- udata/core/reuse/forms.py +24 -21
- udata/core/reuse/models.py +55 -51
- udata/core/reuse/permissions.py +2 -2
- udata/core/reuse/search.py +49 -46
- udata/core/reuse/signals.py +1 -1
- udata/core/reuse/tasks.py +4 -5
- udata/core/site/api.py +47 -50
- udata/core/site/factories.py +2 -2
- udata/core/site/forms.py +4 -5
- udata/core/site/models.py +94 -63
- udata/core/site/rdf.py +14 -14
- udata/core/spam/api.py +16 -9
- udata/core/spam/constants.py +4 -4
- udata/core/spam/fields.py +13 -7
- udata/core/spam/models.py +27 -20
- udata/core/spam/signals.py +1 -1
- udata/core/spam/tests/test_spam.py +6 -5
- udata/core/spatial/api.py +72 -80
- udata/core/spatial/api_fields.py +73 -58
- udata/core/spatial/commands.py +67 -64
- udata/core/spatial/constants.py +3 -3
- udata/core/spatial/factories.py +37 -54
- udata/core/spatial/forms.py +27 -26
- udata/core/spatial/geoids.py +17 -17
- udata/core/spatial/models.py +43 -47
- udata/core/spatial/tasks.py +2 -1
- udata/core/spatial/tests/test_api.py +115 -130
- udata/core/spatial/tests/test_fields.py +74 -77
- udata/core/spatial/tests/test_geoid.py +22 -22
- udata/core/spatial/tests/test_models.py +5 -7
- udata/core/spatial/translations.py +16 -16
- udata/core/storages/__init__.py +16 -18
- udata/core/storages/api.py +66 -64
- udata/core/storages/tasks.py +7 -7
- udata/core/storages/utils.py +15 -15
- udata/core/storages/views.py +5 -6
- udata/core/tags/api.py +17 -14
- udata/core/tags/csv.py +4 -4
- udata/core/tags/models.py +8 -5
- udata/core/tags/tasks.py +11 -13
- udata/core/tags/views.py +4 -4
- udata/core/topic/api.py +84 -73
- udata/core/topic/apiv2.py +157 -127
- udata/core/topic/factories.py +3 -4
- udata/core/topic/forms.py +12 -14
- udata/core/topic/models.py +14 -19
- udata/core/topic/parsers.py +26 -26
- udata/core/user/activities.py +30 -29
- udata/core/user/api.py +151 -152
- udata/core/user/api_fields.py +132 -100
- udata/core/user/apiv2.py +7 -7
- udata/core/user/commands.py +38 -38
- udata/core/user/factories.py +8 -9
- udata/core/user/forms.py +14 -11
- udata/core/user/metrics.py +2 -2
- udata/core/user/models.py +68 -69
- udata/core/user/permissions.py +4 -5
- udata/core/user/rdf.py +7 -8
- udata/core/user/tasks.py +2 -2
- udata/core/user/tests/test_user_model.py +24 -16
- udata/db/tasks.py +2 -1
- udata/entrypoints.py +35 -31
- udata/errors.py +2 -1
- udata/event/values.py +6 -6
- udata/factories.py +2 -2
- udata/features/identicon/api.py +5 -6
- udata/features/identicon/backends.py +48 -55
- udata/features/identicon/tests/test_backends.py +4 -5
- udata/features/notifications/__init__.py +0 -1
- udata/features/notifications/actions.py +9 -9
- udata/features/notifications/api.py +17 -13
- udata/features/territories/__init__.py +12 -10
- udata/features/territories/api.py +14 -15
- udata/features/territories/models.py +23 -28
- udata/features/transfer/actions.py +8 -11
- udata/features/transfer/api.py +84 -77
- udata/features/transfer/factories.py +2 -1
- udata/features/transfer/models.py +11 -12
- udata/features/transfer/notifications.py +19 -15
- udata/features/transfer/permissions.py +5 -5
- udata/forms/__init__.py +5 -2
- udata/forms/fields.py +164 -172
- udata/forms/validators.py +19 -22
- udata/forms/widgets.py +9 -13
- udata/frontend/__init__.py +31 -26
- udata/frontend/csv.py +68 -58
- udata/frontend/markdown.py +40 -44
- udata/harvest/actions.py +89 -77
- udata/harvest/api.py +294 -238
- udata/harvest/backends/__init__.py +4 -4
- udata/harvest/backends/base.py +128 -111
- udata/harvest/backends/dcat.py +80 -66
- udata/harvest/commands.py +56 -60
- udata/harvest/csv.py +8 -8
- udata/harvest/exceptions.py +6 -3
- udata/harvest/filters.py +24 -23
- udata/harvest/forms.py +27 -28
- udata/harvest/models.py +88 -80
- udata/harvest/notifications.py +15 -10
- udata/harvest/signals.py +13 -13
- udata/harvest/tasks.py +11 -10
- udata/harvest/tests/factories.py +23 -24
- udata/harvest/tests/test_actions.py +136 -166
- udata/harvest/tests/test_api.py +220 -214
- udata/harvest/tests/test_base_backend.py +117 -112
- udata/harvest/tests/test_dcat_backend.py +380 -308
- udata/harvest/tests/test_filters.py +33 -22
- udata/harvest/tests/test_models.py +11 -14
- udata/harvest/tests/test_notifications.py +6 -7
- udata/harvest/tests/test_tasks.py +7 -6
- udata/i18n.py +237 -78
- udata/linkchecker/backends.py +5 -11
- udata/linkchecker/checker.py +23 -22
- udata/linkchecker/commands.py +4 -6
- udata/linkchecker/models.py +6 -6
- udata/linkchecker/tasks.py +18 -20
- udata/mail.py +21 -21
- udata/migrations/2020-07-24-remove-s-from-scope-oauth.py +9 -8
- udata/migrations/2020-08-24-add-fs-filename.py +9 -8
- udata/migrations/2020-09-28-update-reuses-datasets-metrics.py +5 -4
- udata/migrations/2020-10-16-migrate-ods-resources.py +9 -10
- udata/migrations/2021-04-08-update-schema-with-new-structure.py +8 -7
- udata/migrations/2021-05-27-fix-default-schema-name.py +7 -6
- udata/migrations/2021-07-05-remove-unused-badges.py +17 -15
- udata/migrations/2021-07-07-update-schema-for-community-resources.py +7 -6
- udata/migrations/2021-08-17-follow-integrity.py +5 -4
- udata/migrations/2021-08-17-harvest-integrity.py +13 -12
- udata/migrations/2021-08-17-oauth2client-integrity.py +5 -4
- udata/migrations/2021-08-17-transfer-integrity.py +5 -4
- udata/migrations/2021-08-17-users-integrity.py +9 -8
- udata/migrations/2021-12-14-reuse-topics.py +7 -6
- udata/migrations/2022-04-21-improve-extension-detection.py +8 -7
- udata/migrations/2022-09-22-clean-inactive-harvest-datasets.py +16 -14
- udata/migrations/2022-10-10-add-fs_uniquifier-to-user-model.py +6 -6
- udata/migrations/2022-10-10-migrate-harvest-extras.py +36 -26
- udata/migrations/2023-02-08-rename-internal-dates.py +46 -28
- udata/migrations/2024-01-29-fix-reuse-and-dataset-with-private-None.py +10 -8
- udata/migrations/2024-03-22-migrate-activity-kwargs-to-extras.py +6 -4
- udata/migrations/2024-06-11-fix-reuse-datasets-references.py +7 -6
- udata/migrations/__init__.py +123 -105
- udata/models/__init__.py +4 -4
- udata/mongo/__init__.py +13 -11
- udata/mongo/badges_field.py +3 -2
- udata/mongo/datetime_fields.py +13 -12
- udata/mongo/document.py +17 -16
- udata/mongo/engine.py +15 -16
- udata/mongo/errors.py +2 -1
- udata/mongo/extras_fields.py +30 -20
- udata/mongo/queryset.py +12 -12
- udata/mongo/slug_fields.py +38 -28
- udata/mongo/taglist_field.py +1 -2
- udata/mongo/url_field.py +5 -5
- udata/mongo/uuid_fields.py +4 -3
- udata/notifications/__init__.py +1 -1
- udata/notifications/mattermost.py +10 -9
- udata/rdf.py +167 -188
- udata/routing.py +40 -45
- udata/search/__init__.py +18 -19
- udata/search/adapter.py +17 -16
- udata/search/commands.py +44 -51
- udata/search/fields.py +13 -20
- udata/search/query.py +23 -18
- udata/search/result.py +9 -10
- udata/sentry.py +21 -19
- udata/settings.py +262 -198
- udata/sitemap.py +8 -6
- udata/static/chunks/{11.e9b9ca1f3e03d4020377.js → 11.52e531c19f8de80c00cf.js} +3 -3
- udata/static/chunks/{11.e9b9ca1f3e03d4020377.js.map → 11.52e531c19f8de80c00cf.js.map} +1 -1
- udata/static/chunks/{13.038c0d9aa0dfa0181c4b.js → 13.c3343a7f1070061c0e10.js} +2 -2
- udata/static/chunks/{13.038c0d9aa0dfa0181c4b.js.map → 13.c3343a7f1070061c0e10.js.map} +1 -1
- udata/static/chunks/{16.0baa2b64a74a2dcde25c.js → 16.8fa42440ad75ca172e6d.js} +2 -2
- udata/static/chunks/{16.0baa2b64a74a2dcde25c.js.map → 16.8fa42440ad75ca172e6d.js.map} +1 -1
- udata/static/chunks/{19.350a9f150b074b4ecefa.js → 19.9c6c8412729cd6d59cfa.js} +3 -3
- udata/static/chunks/{19.350a9f150b074b4ecefa.js.map → 19.9c6c8412729cd6d59cfa.js.map} +1 -1
- udata/static/chunks/{5.6ebbce2b9b3e696d3da5.js → 5.71d15c2e4f21feee2a9a.js} +3 -3
- udata/static/chunks/{5.6ebbce2b9b3e696d3da5.js.map → 5.71d15c2e4f21feee2a9a.js.map} +1 -1
- udata/static/chunks/{6.d8a5f7b017bcbd083641.js → 6.9139dc098b8ea640b890.js} +3 -3
- udata/static/chunks/{6.d8a5f7b017bcbd083641.js.map → 6.9139dc098b8ea640b890.js.map} +1 -1
- udata/static/common.js +1 -1
- udata/static/common.js.map +1 -1
- udata/storage/s3.py +20 -13
- udata/tags.py +4 -5
- udata/tasks.py +43 -42
- udata/tests/__init__.py +9 -6
- udata/tests/api/__init__.py +5 -6
- udata/tests/api/test_auth_api.py +395 -321
- udata/tests/api/test_base_api.py +31 -33
- udata/tests/api/test_contact_points.py +7 -9
- udata/tests/api/test_dataservices_api.py +211 -158
- udata/tests/api/test_datasets_api.py +823 -812
- udata/tests/api/test_follow_api.py +13 -15
- udata/tests/api/test_me_api.py +95 -112
- udata/tests/api/test_organizations_api.py +301 -339
- udata/tests/api/test_reports_api.py +35 -25
- udata/tests/api/test_reuses_api.py +134 -139
- udata/tests/api/test_swagger.py +5 -5
- udata/tests/api/test_tags_api.py +18 -25
- udata/tests/api/test_topics_api.py +94 -94
- udata/tests/api/test_transfer_api.py +53 -48
- udata/tests/api/test_user_api.py +128 -141
- udata/tests/apiv2/test_datasets.py +290 -198
- udata/tests/apiv2/test_me_api.py +10 -11
- udata/tests/apiv2/test_organizations.py +56 -74
- udata/tests/apiv2/test_swagger.py +5 -5
- udata/tests/apiv2/test_topics.py +69 -87
- udata/tests/cli/test_cli_base.py +8 -8
- udata/tests/cli/test_db_cli.py +21 -19
- udata/tests/dataservice/test_dataservice_tasks.py +8 -12
- udata/tests/dataset/test_csv_adapter.py +44 -35
- udata/tests/dataset/test_dataset_actions.py +2 -3
- udata/tests/dataset/test_dataset_commands.py +7 -8
- udata/tests/dataset/test_dataset_events.py +36 -29
- udata/tests/dataset/test_dataset_model.py +224 -217
- udata/tests/dataset/test_dataset_rdf.py +142 -131
- udata/tests/dataset/test_dataset_tasks.py +15 -15
- udata/tests/dataset/test_resource_preview.py +10 -13
- udata/tests/features/territories/__init__.py +9 -13
- udata/tests/features/territories/test_territories_api.py +71 -91
- udata/tests/forms/test_basic_fields.py +7 -7
- udata/tests/forms/test_current_user_field.py +39 -66
- udata/tests/forms/test_daterange_field.py +31 -39
- udata/tests/forms/test_dict_field.py +28 -26
- udata/tests/forms/test_extras_fields.py +102 -76
- udata/tests/forms/test_form_field.py +8 -8
- udata/tests/forms/test_image_field.py +33 -26
- udata/tests/forms/test_model_field.py +134 -123
- udata/tests/forms/test_model_list_field.py +7 -7
- udata/tests/forms/test_nested_model_list_field.py +117 -79
- udata/tests/forms/test_publish_as_field.py +36 -65
- udata/tests/forms/test_reference_field.py +34 -53
- udata/tests/forms/test_user_forms.py +23 -21
- udata/tests/forms/test_uuid_field.py +6 -10
- udata/tests/frontend/__init__.py +9 -6
- udata/tests/frontend/test_auth.py +7 -6
- udata/tests/frontend/test_csv.py +81 -96
- udata/tests/frontend/test_hooks.py +43 -43
- udata/tests/frontend/test_markdown.py +211 -191
- udata/tests/helpers.py +32 -37
- udata/tests/models.py +2 -2
- udata/tests/organization/test_csv_adapter.py +21 -16
- udata/tests/organization/test_notifications.py +11 -18
- udata/tests/organization/test_organization_model.py +13 -13
- udata/tests/organization/test_organization_rdf.py +29 -22
- udata/tests/organization/test_organization_tasks.py +16 -17
- udata/tests/plugin.py +76 -73
- udata/tests/reuse/test_reuse_model.py +21 -21
- udata/tests/reuse/test_reuse_task.py +11 -13
- udata/tests/search/__init__.py +11 -12
- udata/tests/search/test_adapter.py +60 -70
- udata/tests/search/test_query.py +16 -16
- udata/tests/search/test_results.py +10 -7
- udata/tests/site/test_site_api.py +11 -16
- udata/tests/site/test_site_metrics.py +20 -30
- udata/tests/site/test_site_model.py +4 -5
- udata/tests/site/test_site_rdf.py +94 -78
- udata/tests/test_activity.py +17 -17
- udata/tests/test_discussions.py +292 -299
- udata/tests/test_i18n.py +37 -40
- udata/tests/test_linkchecker.py +91 -85
- udata/tests/test_mail.py +13 -17
- udata/tests/test_migrations.py +219 -180
- udata/tests/test_model.py +164 -157
- udata/tests/test_notifications.py +17 -17
- udata/tests/test_owned.py +14 -14
- udata/tests/test_rdf.py +25 -23
- udata/tests/test_routing.py +89 -93
- udata/tests/test_storages.py +137 -128
- udata/tests/test_tags.py +44 -46
- udata/tests/test_topics.py +7 -7
- udata/tests/test_transfer.py +42 -49
- udata/tests/test_uris.py +160 -161
- udata/tests/test_utils.py +79 -71
- udata/tests/user/test_user_rdf.py +5 -9
- udata/tests/workers/test_jobs_commands.py +57 -58
- udata/tests/workers/test_tasks_routing.py +23 -29
- udata/tests/workers/test_workers_api.py +125 -131
- udata/tests/workers/test_workers_helpers.py +6 -6
- udata/tracking.py +4 -6
- udata/uris.py +45 -46
- udata/utils.py +68 -66
- udata/wsgi.py +1 -1
- {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30382.dist-info}/METADATA +3 -2
- udata-9.1.2.dev30382.dist-info/RECORD +704 -0
- udata-9.1.2.dev30355.dist-info/RECORD +0 -704
- {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30382.dist-info}/LICENSE +0 -0
- {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30382.dist-info}/WHEEL +0 -0
- {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30382.dist-info}/entry_points.txt +0 -0
- {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30382.dist-info}/top_level.txt +0 -0
udata/core/dataset/models.py
CHANGED
|
@@ -1,50 +1,70 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
|
|
3
2
|
from datetime import datetime, timedelta
|
|
3
|
+
from pydoc import locate
|
|
4
4
|
from urllib.parse import urlparse
|
|
5
5
|
|
|
6
|
+
import requests
|
|
6
7
|
from blinker import signal
|
|
7
8
|
from dateutil.parser import parse as parse_dt
|
|
8
9
|
from flask import current_app
|
|
9
|
-
from mongoengine import DynamicEmbeddedDocument
|
|
10
|
-
from mongoengine
|
|
10
|
+
from mongoengine import DynamicEmbeddedDocument
|
|
11
|
+
from mongoengine import ValidationError as MongoEngineValidationError
|
|
11
12
|
from mongoengine.fields import DateTimeField
|
|
12
|
-
from
|
|
13
|
+
from mongoengine.signals import post_save, pre_save
|
|
13
14
|
from stringdist import rdlevenshtein
|
|
14
15
|
from werkzeug.utils import cached_property
|
|
15
|
-
import requests
|
|
16
16
|
|
|
17
17
|
from udata.app import cache
|
|
18
18
|
from udata.core import storages
|
|
19
|
+
from udata.core.owned import Owned, OwnedQuerySet
|
|
19
20
|
from udata.frontend.markdown import mdstrip
|
|
20
|
-
from udata.models import db, WithMetrics, BadgeMixin, SpatialCoverage
|
|
21
|
-
from udata.mongo.errors import FieldValidationError
|
|
22
21
|
from udata.i18n import lazy_gettext as _
|
|
23
|
-
from udata.
|
|
22
|
+
from udata.models import BadgeMixin, SpatialCoverage, WithMetrics, db
|
|
23
|
+
from udata.mongo.errors import FieldValidationError
|
|
24
24
|
from udata.uris import ValidationError, endpoint_for
|
|
25
25
|
from udata.uris import validate as validate_url
|
|
26
|
-
from udata.
|
|
27
|
-
from .constants import CHECKSUM_TYPES, CLOSED_FORMATS, DEFAULT_LICENSE, LEGACY_FREQUENCIES, MAX_DISTANCE, PIVOTAL_DATA, RESOURCE_FILETYPES, RESOURCE_TYPES, SCHEMA_CACHE_DURATION, UPDATE_FREQUENCIES
|
|
26
|
+
from udata.utils import get_by, hash_url, to_naive_datetime
|
|
28
27
|
|
|
29
|
-
from .
|
|
28
|
+
from .constants import (
|
|
29
|
+
CHECKSUM_TYPES,
|
|
30
|
+
CLOSED_FORMATS,
|
|
31
|
+
DEFAULT_LICENSE,
|
|
32
|
+
LEGACY_FREQUENCIES,
|
|
33
|
+
MAX_DISTANCE,
|
|
34
|
+
PIVOTAL_DATA,
|
|
35
|
+
RESOURCE_FILETYPES,
|
|
36
|
+
RESOURCE_TYPES,
|
|
37
|
+
SCHEMA_CACHE_DURATION,
|
|
38
|
+
UPDATE_FREQUENCIES,
|
|
39
|
+
)
|
|
30
40
|
from .exceptions import (
|
|
31
|
-
|
|
41
|
+
SchemasCacheUnavailableException,
|
|
42
|
+
SchemasCatalogNotFoundException,
|
|
32
43
|
)
|
|
44
|
+
from .preview import get_preview_url
|
|
33
45
|
|
|
34
|
-
__all__ = (
|
|
46
|
+
__all__ = (
|
|
47
|
+
"License",
|
|
48
|
+
"Resource",
|
|
49
|
+
"Schema",
|
|
50
|
+
"Dataset",
|
|
51
|
+
"Checksum",
|
|
52
|
+
"CommunityResource",
|
|
53
|
+
"ResourceSchema",
|
|
54
|
+
)
|
|
35
55
|
|
|
36
|
-
NON_ASSIGNABLE_SCHEMA_TYPES = [
|
|
56
|
+
NON_ASSIGNABLE_SCHEMA_TYPES = ["datapackage"]
|
|
37
57
|
|
|
38
58
|
log = logging.getLogger(__name__)
|
|
39
59
|
|
|
40
60
|
|
|
41
61
|
def get_json_ld_extra(key, value):
|
|
42
|
-
|
|
43
|
-
value = value.serialize() if hasattr(value,
|
|
62
|
+
"""Serialize an extras key, value pair into JSON-LD"""
|
|
63
|
+
value = value.serialize() if hasattr(value, "serialize") else value
|
|
44
64
|
return {
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
65
|
+
"@type": "http://schema.org/PropertyValue",
|
|
66
|
+
"name": key,
|
|
67
|
+
"value": value,
|
|
48
68
|
}
|
|
49
69
|
|
|
50
70
|
|
|
@@ -75,6 +95,7 @@ class Schema(db.EmbeddedDocument):
|
|
|
75
95
|
- Known schema: url is not set, name is set, version is maybe set
|
|
76
96
|
- Unknown schema: url is set, name and version are maybe set
|
|
77
97
|
"""
|
|
98
|
+
|
|
78
99
|
url = db.URLField()
|
|
79
100
|
name = db.StringField()
|
|
80
101
|
version = db.StringField()
|
|
@@ -91,20 +112,23 @@ class Schema(db.EmbeddedDocument):
|
|
|
91
112
|
|
|
92
113
|
def to_dict(self):
|
|
93
114
|
return {
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
115
|
+
"url": self.url,
|
|
116
|
+
"name": self.name,
|
|
117
|
+
"version": self.version,
|
|
97
118
|
}
|
|
98
119
|
|
|
99
120
|
def clean(self, **kwargs):
|
|
100
121
|
super().clean()
|
|
101
122
|
|
|
102
|
-
check_schema_in_catalog = kwargs.get(
|
|
123
|
+
check_schema_in_catalog = kwargs.get("check_schema_in_catalog", False)
|
|
103
124
|
|
|
104
125
|
if not self.url and not self.name:
|
|
105
126
|
# There is no schema.
|
|
106
127
|
if self.version:
|
|
107
|
-
raise FieldValidationError(
|
|
128
|
+
raise FieldValidationError(
|
|
129
|
+
_("A schema must contains a name or an URL when a version is provided."),
|
|
130
|
+
field="version",
|
|
131
|
+
)
|
|
108
132
|
|
|
109
133
|
return
|
|
110
134
|
|
|
@@ -115,11 +139,11 @@ class Schema(db.EmbeddedDocument):
|
|
|
115
139
|
self.url = None
|
|
116
140
|
self.name = info[0]
|
|
117
141
|
self.version = info[1]
|
|
118
|
-
|
|
142
|
+
|
|
119
143
|
# Nothing more to do since an URL can point to anywhere and have a random name/version
|
|
120
144
|
return
|
|
121
145
|
|
|
122
|
-
# All the following checks are only run if there is
|
|
146
|
+
# All the following checks are only run if there is
|
|
123
147
|
# some schemas in the catalog. If there is no catalog
|
|
124
148
|
# or no schema in the catalog we do not check the validity
|
|
125
149
|
# of the name and version
|
|
@@ -128,31 +152,35 @@ class Schema(db.EmbeddedDocument):
|
|
|
128
152
|
return
|
|
129
153
|
|
|
130
154
|
# We know this schema so we can do some checks
|
|
131
|
-
existing_schema = next(
|
|
155
|
+
existing_schema = next(
|
|
156
|
+
(schema for schema in catalog_schemas if schema["name"] == self.name), None
|
|
157
|
+
)
|
|
132
158
|
|
|
133
159
|
if not existing_schema:
|
|
134
|
-
message = _(
|
|
160
|
+
message = _(
|
|
161
|
+
'Schema name "{schema}" is not an allowed value. Allowed values: {values}'
|
|
162
|
+
).format(
|
|
135
163
|
schema=self.name,
|
|
136
|
-
values=
|
|
164
|
+
values=", ".join(map(lambda schema: schema["name"], catalog_schemas)),
|
|
137
165
|
)
|
|
138
166
|
if check_schema_in_catalog:
|
|
139
|
-
raise FieldValidationError(message, field=
|
|
167
|
+
raise FieldValidationError(message, field="name")
|
|
140
168
|
else:
|
|
141
169
|
log.warning(message)
|
|
142
170
|
return
|
|
143
171
|
|
|
144
172
|
if self.version:
|
|
145
|
-
allowed_versions = list(
|
|
146
|
-
|
|
173
|
+
allowed_versions = list(
|
|
174
|
+
map(lambda version: version["version_name"], existing_schema["versions"])
|
|
175
|
+
)
|
|
176
|
+
allowed_versions.append("latest")
|
|
147
177
|
|
|
148
178
|
if self.version not in allowed_versions:
|
|
149
|
-
message = _(
|
|
150
|
-
version
|
|
151
|
-
|
|
152
|
-
values=', '.join(allowed_versions)
|
|
153
|
-
)
|
|
179
|
+
message = _(
|
|
180
|
+
'Version "{version}" is not an allowed value for the schema "{name}". Allowed versions: {values}'
|
|
181
|
+
).format(version=self.version, name=self.name, values=", ".join(allowed_versions))
|
|
154
182
|
if check_schema_in_catalog:
|
|
155
|
-
raise FieldValidationError(message, field=
|
|
183
|
+
raise FieldValidationError(message, field="version")
|
|
156
184
|
else:
|
|
157
185
|
log.warning(message)
|
|
158
186
|
return
|
|
@@ -165,7 +193,7 @@ class License(db.Document):
|
|
|
165
193
|
created_at = db.DateTimeField(default=datetime.utcnow, required=True)
|
|
166
194
|
title = db.StringField(required=True)
|
|
167
195
|
alternate_titles = db.ListField(db.StringField())
|
|
168
|
-
slug = db.SlugField(required=True, populate_from=
|
|
196
|
+
slug = db.SlugField(required=True, populate_from="title")
|
|
169
197
|
url = db.URLField()
|
|
170
198
|
alternate_urls = db.ListField(db.URLField())
|
|
171
199
|
maintainer = db.StringField()
|
|
@@ -178,34 +206,36 @@ class License(db.Document):
|
|
|
178
206
|
|
|
179
207
|
@classmethod
|
|
180
208
|
def guess(cls, *strings, **kwargs):
|
|
181
|
-
|
|
209
|
+
"""
|
|
182
210
|
Try to guess a license from a list of strings.
|
|
183
211
|
|
|
184
212
|
Accept a `default` keyword argument which will be
|
|
185
213
|
the default fallback license.
|
|
186
|
-
|
|
214
|
+
"""
|
|
187
215
|
license = None
|
|
188
216
|
for string in strings:
|
|
189
217
|
license = cls.guess_one(string)
|
|
190
218
|
if license:
|
|
191
219
|
break
|
|
192
|
-
return license or kwargs.get(
|
|
220
|
+
return license or kwargs.get("default")
|
|
193
221
|
|
|
194
222
|
@classmethod
|
|
195
223
|
def guess_one(cls, text):
|
|
196
|
-
|
|
224
|
+
"""
|
|
197
225
|
Try to guess license from a string.
|
|
198
226
|
|
|
199
227
|
Try to exact match on identifier then slugified title
|
|
200
228
|
and fallback on edit distance ranking (after slugification)
|
|
201
|
-
|
|
229
|
+
"""
|
|
202
230
|
if not text:
|
|
203
231
|
return
|
|
204
232
|
qs = cls.objects
|
|
205
233
|
text = text.strip().lower() # Stored identifiers are lower case
|
|
206
234
|
slug = cls.slug.slugify(text) # Use slug as it normalize string
|
|
207
235
|
license = qs(
|
|
208
|
-
db.Q(id__iexact=text)
|
|
236
|
+
db.Q(id__iexact=text)
|
|
237
|
+
| db.Q(slug=slug)
|
|
238
|
+
| db.Q(url__iexact=text)
|
|
209
239
|
| db.Q(alternate_urls__iexact=text)
|
|
210
240
|
).first()
|
|
211
241
|
|
|
@@ -218,9 +248,11 @@ class License(db.Document):
|
|
|
218
248
|
pass
|
|
219
249
|
else:
|
|
220
250
|
parsed = urlparse(url)
|
|
221
|
-
path = parsed.path.rstrip(
|
|
222
|
-
query = f
|
|
223
|
-
license = qs(
|
|
251
|
+
path = parsed.path.rstrip("/")
|
|
252
|
+
query = f"{parsed.netloc}{path}"
|
|
253
|
+
license = qs(
|
|
254
|
+
db.Q(url__icontains=query) | db.Q(alternate_urls__contains=query)
|
|
255
|
+
).first()
|
|
224
256
|
|
|
225
257
|
if license is None:
|
|
226
258
|
# Try to single match `slug` with a low Damerau-Levenshtein distance
|
|
@@ -256,7 +288,7 @@ class License(db.Document):
|
|
|
256
288
|
|
|
257
289
|
@classmethod
|
|
258
290
|
def default(cls):
|
|
259
|
-
return cls.objects(id=DEFAULT_LICENSE[
|
|
291
|
+
return cls.objects(id=DEFAULT_LICENSE["id"]).first()
|
|
260
292
|
|
|
261
293
|
|
|
262
294
|
class DatasetQuerySet(OwnedQuerySet):
|
|
@@ -264,9 +296,7 @@ class DatasetQuerySet(OwnedQuerySet):
|
|
|
264
296
|
return self(private__ne=True, deleted=None, archived=None)
|
|
265
297
|
|
|
266
298
|
def hidden(self):
|
|
267
|
-
return self(db.Q(private=True) |
|
|
268
|
-
db.Q(deleted__ne=None) |
|
|
269
|
-
db.Q(archived__ne=None))
|
|
299
|
+
return self(db.Q(private=True) | db.Q(deleted__ne=None) | db.Q(archived__ne=None))
|
|
270
300
|
|
|
271
301
|
|
|
272
302
|
class Checksum(db.EmbeddedDocument):
|
|
@@ -282,10 +312,8 @@ class ResourceMixin(object):
|
|
|
282
312
|
id = db.AutoUUIDField(primary_key=True)
|
|
283
313
|
title = db.StringField(verbose_name="Title", required=True)
|
|
284
314
|
description = db.StringField()
|
|
285
|
-
filetype = db.StringField(
|
|
286
|
-
|
|
287
|
-
type = db.StringField(
|
|
288
|
-
choices=list(RESOURCE_TYPES), default='main', required=True)
|
|
315
|
+
filetype = db.StringField(choices=list(RESOURCE_FILETYPES), default="file", required=True)
|
|
316
|
+
type = db.StringField(choices=list(RESOURCE_TYPES), default="main", required=True)
|
|
289
317
|
url = db.URLField(required=True)
|
|
290
318
|
urlhash = db.StringField()
|
|
291
319
|
checksum = db.EmbeddedDocumentField(Checksum)
|
|
@@ -296,7 +324,7 @@ class ResourceMixin(object):
|
|
|
296
324
|
extras = db.ExtrasField()
|
|
297
325
|
harvest = db.EmbeddedDocumentField(HarvestResourceMetadata)
|
|
298
326
|
schema = db.EmbeddedDocumentField(Schema)
|
|
299
|
-
|
|
327
|
+
|
|
300
328
|
created_at_internal = db.DateTimeField(default=datetime.utcnow, required=True)
|
|
301
329
|
last_modified_internal = db.DateTimeField(default=datetime.utcnow, required=True)
|
|
302
330
|
deleted = db.DateTimeField()
|
|
@@ -304,25 +332,33 @@ class ResourceMixin(object):
|
|
|
304
332
|
@property
|
|
305
333
|
def internal(self):
|
|
306
334
|
return {
|
|
307
|
-
|
|
308
|
-
|
|
335
|
+
"created_at_internal": self.created_at_internal,
|
|
336
|
+
"last_modified_internal": self.last_modified_internal,
|
|
309
337
|
}
|
|
310
338
|
|
|
311
339
|
@property
|
|
312
340
|
def created_at(self):
|
|
313
|
-
return
|
|
341
|
+
return (
|
|
342
|
+
self.harvest.created_at
|
|
343
|
+
if self.harvest and self.harvest.created_at
|
|
344
|
+
else self.created_at_internal
|
|
345
|
+
)
|
|
314
346
|
|
|
315
347
|
@property
|
|
316
348
|
def last_modified(self):
|
|
317
|
-
if
|
|
349
|
+
if (
|
|
350
|
+
self.harvest
|
|
351
|
+
and self.harvest.modified_at
|
|
352
|
+
and to_naive_datetime(self.harvest.modified_at) < datetime.utcnow()
|
|
353
|
+
):
|
|
318
354
|
return to_naive_datetime(self.harvest.modified_at)
|
|
319
|
-
if self.filetype ==
|
|
320
|
-
return to_naive_datetime(self.extras.get(
|
|
355
|
+
if self.filetype == "remote" and self.extras.get("analysis:last-modified-at"):
|
|
356
|
+
return to_naive_datetime(self.extras.get("analysis:last-modified-at"))
|
|
321
357
|
return self.last_modified_internal
|
|
322
358
|
|
|
323
359
|
def clean(self):
|
|
324
360
|
super(ResourceMixin, self).clean()
|
|
325
|
-
if not self.urlhash or
|
|
361
|
+
if not self.urlhash or "url" in self._get_changed_fields():
|
|
326
362
|
self.urlhash = hash_url(self.url)
|
|
327
363
|
|
|
328
364
|
@cached_property # Accessed at least 2 times in front rendering
|
|
@@ -338,40 +374,40 @@ class ResourceMixin(object):
|
|
|
338
374
|
return not self.format or self.format.lower() in CLOSED_FORMATS
|
|
339
375
|
|
|
340
376
|
def check_availability(self):
|
|
341
|
-
|
|
377
|
+
"""
|
|
342
378
|
Return the check status from extras if any.
|
|
343
379
|
|
|
344
380
|
NB: `unknown` will evaluate to True in the aggregate checks using
|
|
345
381
|
`all([])` (dataset, organization, user).
|
|
346
|
-
|
|
347
|
-
return self.extras.get(
|
|
382
|
+
"""
|
|
383
|
+
return self.extras.get("check:available", "unknown")
|
|
348
384
|
|
|
349
385
|
def need_check(self):
|
|
350
|
-
|
|
386
|
+
"""Does the resource needs to be checked against its linkchecker?
|
|
351
387
|
|
|
352
388
|
We check unavailable resources often, unless they go over the
|
|
353
389
|
threshold. Available resources are checked less and less frequently
|
|
354
390
|
based on their historical availability.
|
|
355
|
-
|
|
391
|
+
"""
|
|
356
392
|
min_cache_duration, max_cache_duration, ko_threshold = [
|
|
357
|
-
current_app.config.get(k)
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
393
|
+
current_app.config.get(k)
|
|
394
|
+
for k in (
|
|
395
|
+
"LINKCHECKING_MIN_CACHE_DURATION",
|
|
396
|
+
"LINKCHECKING_MAX_CACHE_DURATION",
|
|
397
|
+
"LINKCHECKING_UNAVAILABLE_THRESHOLD",
|
|
361
398
|
)
|
|
362
399
|
]
|
|
363
|
-
count_availability = self.extras.get(
|
|
400
|
+
count_availability = self.extras.get("check:count-availability", 1)
|
|
364
401
|
is_available = self.check_availability()
|
|
365
|
-
if is_available ==
|
|
402
|
+
if is_available == "unknown":
|
|
366
403
|
return True
|
|
367
404
|
elif is_available or count_availability > ko_threshold:
|
|
368
|
-
delta = min(min_cache_duration * count_availability,
|
|
369
|
-
max_cache_duration)
|
|
405
|
+
delta = min(min_cache_duration * count_availability, max_cache_duration)
|
|
370
406
|
else:
|
|
371
407
|
delta = min_cache_duration
|
|
372
|
-
if self.extras.get(
|
|
408
|
+
if self.extras.get("check:date"):
|
|
373
409
|
limit_date = datetime.utcnow() - timedelta(minutes=delta)
|
|
374
|
-
check_date = self.extras[
|
|
410
|
+
check_date = self.extras["check:date"]
|
|
375
411
|
if not isinstance(check_date, datetime):
|
|
376
412
|
try:
|
|
377
413
|
check_date = parse_dt(check_date)
|
|
@@ -383,62 +419,63 @@ class ResourceMixin(object):
|
|
|
383
419
|
|
|
384
420
|
@property
|
|
385
421
|
def latest(self):
|
|
386
|
-
|
|
422
|
+
"""
|
|
387
423
|
Permanent link to the latest version of this resource.
|
|
388
424
|
|
|
389
425
|
If this resource is updated and `url` changes, this property won't.
|
|
390
|
-
|
|
391
|
-
return endpoint_for(
|
|
426
|
+
"""
|
|
427
|
+
return endpoint_for(
|
|
428
|
+
"datasets.resource", "api.resource_redirect", id=self.id, _external=True
|
|
429
|
+
)
|
|
392
430
|
|
|
393
431
|
@cached_property
|
|
394
432
|
def json_ld(self):
|
|
395
|
-
|
|
396
433
|
result = {
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
for item in self.extras.items()],
|
|
434
|
+
"@type": "DataDownload",
|
|
435
|
+
"@id": str(self.id),
|
|
436
|
+
"url": self.latest,
|
|
437
|
+
"name": self.title or _("Nameless resource"),
|
|
438
|
+
"contentUrl": self.url,
|
|
439
|
+
"dateCreated": self.created_at.isoformat(),
|
|
440
|
+
"dateModified": self.last_modified.isoformat(),
|
|
441
|
+
"extras": [get_json_ld_extra(*item) for item in self.extras.items()],
|
|
406
442
|
}
|
|
407
443
|
|
|
408
|
-
if
|
|
409
|
-
result[
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
444
|
+
if "views" in self.metrics:
|
|
445
|
+
result["interactionStatistic"] = {
|
|
446
|
+
"@type": "InteractionCounter",
|
|
447
|
+
"interactionType": {
|
|
448
|
+
"@type": "DownloadAction",
|
|
413
449
|
},
|
|
414
|
-
|
|
450
|
+
"userInteractionCount": self.metrics["views"],
|
|
415
451
|
}
|
|
416
452
|
|
|
417
453
|
if self.format:
|
|
418
|
-
result[
|
|
454
|
+
result["encodingFormat"] = self.format
|
|
419
455
|
|
|
420
456
|
if self.filesize:
|
|
421
|
-
result[
|
|
457
|
+
result["contentSize"] = self.filesize
|
|
422
458
|
|
|
423
459
|
if self.mime:
|
|
424
|
-
result[
|
|
460
|
+
result["fileFormat"] = self.mime
|
|
425
461
|
|
|
426
462
|
if self.description:
|
|
427
|
-
result[
|
|
463
|
+
result["description"] = mdstrip(self.description)
|
|
428
464
|
|
|
429
465
|
return result
|
|
430
466
|
|
|
431
467
|
|
|
432
468
|
class Resource(ResourceMixin, WithMetrics, db.EmbeddedDocument):
|
|
433
|
-
|
|
469
|
+
"""
|
|
434
470
|
Local file, remote file or API provided by the original provider of the
|
|
435
471
|
dataset
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
472
|
+
"""
|
|
473
|
+
|
|
474
|
+
on_added = signal("Resource.on_added")
|
|
475
|
+
on_deleted = signal("Resource.on_deleted")
|
|
439
476
|
|
|
440
477
|
__metrics_keys__ = [
|
|
441
|
-
|
|
478
|
+
"views",
|
|
442
479
|
]
|
|
443
480
|
|
|
444
481
|
@property
|
|
@@ -447,13 +484,15 @@ class Resource(ResourceMixin, WithMetrics, db.EmbeddedDocument):
|
|
|
447
484
|
self._instance.id # try to access attr from parent instance
|
|
448
485
|
return self._instance
|
|
449
486
|
except ReferenceError: # weakly-referenced object no longer exists
|
|
450
|
-
log.warning(
|
|
451
|
-
|
|
487
|
+
log.warning(
|
|
488
|
+
"Weakly referenced object for resource.dataset no longer exists, "
|
|
489
|
+
"using a poor performance query instead."
|
|
490
|
+
)
|
|
452
491
|
return Dataset.objects(resources__id=self.id).first()
|
|
453
492
|
|
|
454
493
|
def save(self, *args, **kwargs):
|
|
455
494
|
if not self.dataset:
|
|
456
|
-
raise RuntimeError(
|
|
495
|
+
raise RuntimeError("Impossible to save an orphan resource")
|
|
457
496
|
self.dataset.save(*args, **kwargs)
|
|
458
497
|
|
|
459
498
|
|
|
@@ -462,17 +501,18 @@ class Dataset(WithMetrics, BadgeMixin, Owned, db.Document):
|
|
|
462
501
|
acronym = db.StringField(max_length=128)
|
|
463
502
|
# /!\ do not set directly the slug when creating or updating a dataset
|
|
464
503
|
# this will break the search indexation
|
|
465
|
-
slug = db.SlugField(
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
504
|
+
slug = db.SlugField(
|
|
505
|
+
max_length=255, required=True, populate_from="title", update=True, follow=True
|
|
506
|
+
)
|
|
507
|
+
description = db.StringField(required=True, default="")
|
|
508
|
+
license = db.ReferenceField("License")
|
|
469
509
|
|
|
470
510
|
tags = db.TagListField()
|
|
471
511
|
resources = db.ListField(db.EmbeddedDocumentField(Resource))
|
|
472
512
|
|
|
473
513
|
private = db.BooleanField(default=False)
|
|
474
514
|
frequency = db.StringField(choices=list(UPDATE_FREQUENCIES.keys()))
|
|
475
|
-
frequency_date = db.DateTimeField(verbose_name=_(
|
|
515
|
+
frequency_date = db.DateTimeField(verbose_name=_("Future date of update"))
|
|
476
516
|
temporal_coverage = db.EmbeddedDocumentField(db.DateRange)
|
|
477
517
|
spatial = db.EmbeddedDocumentField(SpatialCoverage)
|
|
478
518
|
schema = db.EmbeddedDocumentField(Schema)
|
|
@@ -483,60 +523,63 @@ class Dataset(WithMetrics, BadgeMixin, Owned, db.Document):
|
|
|
483
523
|
|
|
484
524
|
featured = db.BooleanField(required=True, default=False)
|
|
485
525
|
|
|
486
|
-
contact_point = db.ReferenceField(
|
|
526
|
+
contact_point = db.ReferenceField("ContactPoint", reverse_delete_rule=db.NULLIFY)
|
|
487
527
|
|
|
488
|
-
created_at_internal = DateTimeField(
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
528
|
+
created_at_internal = DateTimeField(
|
|
529
|
+
verbose_name=_("Creation date"), default=datetime.utcnow, required=True
|
|
530
|
+
)
|
|
531
|
+
last_modified_internal = DateTimeField(
|
|
532
|
+
verbose_name=_("Last modification date"), default=datetime.utcnow, required=True
|
|
533
|
+
)
|
|
492
534
|
deleted = db.DateTimeField()
|
|
493
535
|
archived = db.DateTimeField()
|
|
494
536
|
|
|
495
537
|
def __str__(self):
|
|
496
|
-
return self.title or
|
|
538
|
+
return self.title or ""
|
|
497
539
|
|
|
498
540
|
__badges__ = {
|
|
499
|
-
PIVOTAL_DATA: _(
|
|
541
|
+
PIVOTAL_DATA: _("Pivotal data"),
|
|
500
542
|
}
|
|
501
543
|
|
|
502
544
|
__metrics_keys__ = [
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
545
|
+
"discussions",
|
|
546
|
+
"reuses",
|
|
547
|
+
"followers",
|
|
548
|
+
"views",
|
|
549
|
+
"resources_downloads",
|
|
508
550
|
]
|
|
509
551
|
|
|
510
552
|
meta = {
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
]
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
553
|
+
"indexes": [
|
|
554
|
+
"$title",
|
|
555
|
+
"created_at_internal",
|
|
556
|
+
"last_modified_internal",
|
|
557
|
+
"metrics.reuses",
|
|
558
|
+
"metrics.followers",
|
|
559
|
+
"metrics.views",
|
|
560
|
+
"slug",
|
|
561
|
+
"resources.id",
|
|
562
|
+
"resources.urlhash",
|
|
563
|
+
]
|
|
564
|
+
+ Owned.meta["indexes"],
|
|
565
|
+
"ordering": ["-created_at_internal"],
|
|
566
|
+
"queryset_class": DatasetQuerySet,
|
|
567
|
+
"auto_create_index_on_save": True,
|
|
525
568
|
}
|
|
526
569
|
|
|
527
|
-
before_save = signal(
|
|
528
|
-
after_save = signal(
|
|
529
|
-
on_create = signal(
|
|
530
|
-
on_update = signal(
|
|
531
|
-
before_delete = signal(
|
|
532
|
-
after_delete = signal(
|
|
533
|
-
on_delete = signal(
|
|
534
|
-
on_archive = signal(
|
|
535
|
-
on_resource_added = signal(
|
|
536
|
-
on_resource_updated = signal(
|
|
537
|
-
on_resource_removed = signal(
|
|
570
|
+
before_save = signal("Dataset.before_save")
|
|
571
|
+
after_save = signal("Dataset.after_save")
|
|
572
|
+
on_create = signal("Dataset.on_create")
|
|
573
|
+
on_update = signal("Dataset.on_update")
|
|
574
|
+
before_delete = signal("Dataset.before_delete")
|
|
575
|
+
after_delete = signal("Dataset.after_delete")
|
|
576
|
+
on_delete = signal("Dataset.on_delete")
|
|
577
|
+
on_archive = signal("Dataset.on_archive")
|
|
578
|
+
on_resource_added = signal("Dataset.on_resource_added")
|
|
579
|
+
on_resource_updated = signal("Dataset.on_resource_updated")
|
|
580
|
+
on_resource_removed = signal("Dataset.on_resource_removed")
|
|
538
581
|
|
|
539
|
-
verbose_name = _(
|
|
582
|
+
verbose_name = _("dataset")
|
|
540
583
|
|
|
541
584
|
@classmethod
|
|
542
585
|
def pre_save(cls, sender, document, **kwargs):
|
|
@@ -544,10 +587,10 @@ class Dataset(WithMetrics, BadgeMixin, Owned, db.Document):
|
|
|
544
587
|
|
|
545
588
|
@classmethod
|
|
546
589
|
def post_save(cls, sender, document, **kwargs):
|
|
547
|
-
if
|
|
590
|
+
if "post_save" in kwargs.get("ignores", []):
|
|
548
591
|
return
|
|
549
592
|
cls.after_save.send(document)
|
|
550
|
-
if kwargs.get(
|
|
593
|
+
if kwargs.get("created"):
|
|
551
594
|
cls.on_create.send(document)
|
|
552
595
|
else:
|
|
553
596
|
cls.on_update.send(document)
|
|
@@ -560,32 +603,36 @@ class Dataset(WithMetrics, BadgeMixin, Owned, db.Document):
|
|
|
560
603
|
self.frequency = LEGACY_FREQUENCIES[self.frequency]
|
|
561
604
|
|
|
562
605
|
for key, value in self.extras.items():
|
|
563
|
-
if not key.startswith(
|
|
606
|
+
if not key.startswith("custom:"):
|
|
564
607
|
continue
|
|
565
608
|
if not self.organization:
|
|
566
609
|
raise MongoEngineValidationError(
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
610
|
+
"Custom metadatas are only accessible to dataset owned by on organization."
|
|
611
|
+
)
|
|
612
|
+
custom_meta = key.split(":")[1]
|
|
613
|
+
org_custom = self.organization.extras.get("custom", [])
|
|
570
614
|
custom_present = False
|
|
571
615
|
for custom in org_custom:
|
|
572
|
-
if custom[
|
|
616
|
+
if custom["title"] != custom_meta:
|
|
573
617
|
continue
|
|
574
618
|
custom_present = True
|
|
575
|
-
if custom[
|
|
576
|
-
if value not in custom[
|
|
619
|
+
if custom["type"] == "choice":
|
|
620
|
+
if value not in custom["choices"]:
|
|
577
621
|
raise MongoEngineValidationError(
|
|
578
|
-
|
|
622
|
+
"Custom metadata choice is not defined by organization."
|
|
623
|
+
)
|
|
579
624
|
else:
|
|
580
|
-
if not isinstance(value, locate(custom[
|
|
625
|
+
if not isinstance(value, locate(custom["type"])):
|
|
581
626
|
raise MongoEngineValidationError(
|
|
582
|
-
|
|
627
|
+
"Custom metadata is not of the right type."
|
|
628
|
+
)
|
|
583
629
|
if not custom_present:
|
|
584
630
|
raise MongoEngineValidationError(
|
|
585
|
-
|
|
631
|
+
"Dataset's organization did not define the requested custom metadata."
|
|
632
|
+
)
|
|
586
633
|
|
|
587
634
|
def url_for(self, *args, **kwargs):
|
|
588
|
-
return endpoint_for(
|
|
635
|
+
return endpoint_for("datasets.show", "api.dataset", dataset=self, *args, **kwargs)
|
|
589
636
|
|
|
590
637
|
display_url = property(url_for)
|
|
591
638
|
|
|
@@ -601,7 +648,7 @@ class Dataset(WithMetrics, BadgeMixin, Owned, db.Document):
|
|
|
601
648
|
def full_title(self):
|
|
602
649
|
if not self.acronym:
|
|
603
650
|
return self.title
|
|
604
|
-
return
|
|
651
|
+
return "{title} ({acronym})".format(**self._data)
|
|
605
652
|
|
|
606
653
|
@property
|
|
607
654
|
def external_url(self):
|
|
@@ -616,8 +663,7 @@ class Dataset(WithMetrics, BadgeMixin, Owned, db.Document):
|
|
|
616
663
|
|
|
617
664
|
@property
|
|
618
665
|
def frequency_label(self):
|
|
619
|
-
return UPDATE_FREQUENCIES.get(self.frequency or
|
|
620
|
-
UPDATE_FREQUENCIES['unknown'])
|
|
666
|
+
return UPDATE_FREQUENCIES.get(self.frequency or "unknown", UPDATE_FREQUENCIES["unknown"])
|
|
621
667
|
|
|
622
668
|
def check_availability(self):
|
|
623
669
|
"""Check if resources from that dataset are available.
|
|
@@ -625,21 +671,28 @@ class Dataset(WithMetrics, BadgeMixin, Owned, db.Document):
|
|
|
625
671
|
Return a list of (boolean or 'unknown')
|
|
626
672
|
"""
|
|
627
673
|
# Only check remote resources.
|
|
628
|
-
remote_resources = [
|
|
629
|
-
|
|
630
|
-
|
|
674
|
+
remote_resources = [
|
|
675
|
+
resource for resource in self.resources if resource.filetype == "remote"
|
|
676
|
+
]
|
|
631
677
|
if not remote_resources:
|
|
632
678
|
return []
|
|
633
679
|
return [resource.check_availability() for resource in remote_resources]
|
|
634
680
|
|
|
635
681
|
@property
|
|
636
682
|
def created_at(self):
|
|
637
|
-
return
|
|
683
|
+
return (
|
|
684
|
+
self.harvest.created_at
|
|
685
|
+
if self.harvest and self.harvest.created_at
|
|
686
|
+
else self.created_at_internal
|
|
687
|
+
)
|
|
638
688
|
|
|
639
689
|
@property
|
|
640
690
|
def last_modified(self):
|
|
641
|
-
if (
|
|
642
|
-
|
|
691
|
+
if (
|
|
692
|
+
self.harvest
|
|
693
|
+
and self.harvest.modified_at
|
|
694
|
+
and to_naive_datetime(self.harvest.modified_at) < datetime.utcnow()
|
|
695
|
+
):
|
|
643
696
|
return to_naive_datetime(self.harvest.modified_at)
|
|
644
697
|
return self.last_modified_internal
|
|
645
698
|
|
|
@@ -669,27 +722,27 @@ class Dataset(WithMetrics, BadgeMixin, Owned, db.Document):
|
|
|
669
722
|
every 8 hours, but is maximum 24 hours later.
|
|
670
723
|
"""
|
|
671
724
|
delta = None
|
|
672
|
-
if self.frequency ==
|
|
725
|
+
if self.frequency == "hourly":
|
|
673
726
|
delta = timedelta(hours=1)
|
|
674
|
-
elif self.frequency in [
|
|
727
|
+
elif self.frequency in ["fourTimesADay", "threeTimesADay", "semidaily", "daily"]:
|
|
675
728
|
delta = timedelta(days=1)
|
|
676
|
-
elif self.frequency in [
|
|
729
|
+
elif self.frequency in ["fourTimesAWeek", "threeTimesAWeek", "semiweekly", "weekly"]:
|
|
677
730
|
delta = timedelta(weeks=1)
|
|
678
|
-
elif self.frequency ==
|
|
731
|
+
elif self.frequency == "biweekly":
|
|
679
732
|
delta = timedelta(weeks=2)
|
|
680
|
-
elif self.frequency in [
|
|
733
|
+
elif self.frequency in ["threeTimesAMonth", "semimonthly", "monthly"]:
|
|
681
734
|
delta = timedelta(days=31)
|
|
682
|
-
elif self.frequency ==
|
|
735
|
+
elif self.frequency == "bimonthly":
|
|
683
736
|
delta = timedelta(days=31 * 2)
|
|
684
|
-
elif self.frequency ==
|
|
737
|
+
elif self.frequency == "quarterly":
|
|
685
738
|
delta = timedelta(days=365 / 4)
|
|
686
|
-
elif self.frequency in [
|
|
739
|
+
elif self.frequency in ["threeTimesAYear", "semiannual", "annual"]:
|
|
687
740
|
delta = timedelta(days=365)
|
|
688
|
-
elif self.frequency ==
|
|
741
|
+
elif self.frequency == "biennial":
|
|
689
742
|
delta = timedelta(days=365 * 2)
|
|
690
|
-
elif self.frequency ==
|
|
743
|
+
elif self.frequency == "triennial":
|
|
691
744
|
delta = timedelta(days=365 * 3)
|
|
692
|
-
elif self.frequency ==
|
|
745
|
+
elif self.frequency == "quinquennial":
|
|
693
746
|
delta = timedelta(days=365 * 5)
|
|
694
747
|
if delta is None:
|
|
695
748
|
return
|
|
@@ -711,48 +764,49 @@ class Dataset(WithMetrics, BadgeMixin, Owned, db.Document):
|
|
|
711
764
|
# Quality is only relevant on saved Datasets
|
|
712
765
|
return result
|
|
713
766
|
|
|
714
|
-
result[
|
|
715
|
-
result[
|
|
716
|
-
result[
|
|
767
|
+
result["license"] = True if self.license else False
|
|
768
|
+
result["temporal_coverage"] = True if self.temporal_coverage else False
|
|
769
|
+
result["spatial"] = True if self.spatial else False
|
|
717
770
|
|
|
718
|
-
result[
|
|
771
|
+
result["update_frequency"] = self.frequency and self.frequency != "unknown"
|
|
719
772
|
if self.next_update:
|
|
720
773
|
# Allow for being one day late on update.
|
|
721
774
|
# We may have up to one day delay due to harvesting for example
|
|
722
|
-
result[
|
|
775
|
+
result["update_fulfilled_in_time"] = (
|
|
723
776
|
True if (self.next_update - datetime.utcnow()).days >= -1 else False
|
|
724
777
|
)
|
|
725
|
-
elif self.frequency in [
|
|
778
|
+
elif self.frequency in ["continuous", "irregular", "punctual"]:
|
|
726
779
|
# For these frequencies, we don't expect regular updates or can't quantify them.
|
|
727
780
|
# Thus we consider the update_fulfilled_in_time quality criterion to be true.
|
|
728
|
-
result[
|
|
781
|
+
result["update_fulfilled_in_time"] = True
|
|
729
782
|
|
|
730
|
-
result[
|
|
731
|
-
True
|
|
783
|
+
result["dataset_description_quality"] = (
|
|
784
|
+
True
|
|
785
|
+
if len(self.description) > current_app.config.get("QUALITY_DESCRIPTION_LENGTH")
|
|
732
786
|
else False
|
|
733
787
|
)
|
|
734
788
|
|
|
735
789
|
if self.resources:
|
|
736
|
-
result[
|
|
737
|
-
result[
|
|
738
|
-
resource.closed_or_no_format for resource in self.resources
|
|
739
|
-
|
|
790
|
+
result["has_resources"] = True
|
|
791
|
+
result["has_open_format"] = not all(
|
|
792
|
+
resource.closed_or_no_format for resource in self.resources
|
|
793
|
+
)
|
|
794
|
+
result["all_resources_available"] = all(self.check_availability())
|
|
740
795
|
resource_doc = False
|
|
741
796
|
resource_desc = False
|
|
742
797
|
for resource in self.resources:
|
|
743
|
-
if resource.type ==
|
|
798
|
+
if resource.type == "documentation":
|
|
744
799
|
resource_doc = True
|
|
745
800
|
if resource.description:
|
|
746
801
|
resource_desc = True
|
|
747
|
-
result[
|
|
802
|
+
result["resources_documentation"] = resource_doc or resource_desc
|
|
748
803
|
|
|
749
|
-
result[
|
|
804
|
+
result["score"] = self.compute_quality_score(result)
|
|
750
805
|
return result
|
|
751
|
-
|
|
806
|
+
|
|
752
807
|
@property
|
|
753
808
|
def downloads(self):
|
|
754
|
-
return sum(resource.metrics.get(
|
|
755
|
-
|
|
809
|
+
return sum(resource.metrics.get("views", 0) for resource in self.resources)
|
|
756
810
|
|
|
757
811
|
@staticmethod
|
|
758
812
|
def normalize_score(score):
|
|
@@ -772,25 +826,25 @@ class Dataset(WithMetrics, BadgeMixin, Owned, db.Document):
|
|
|
772
826
|
"""
|
|
773
827
|
score = 0
|
|
774
828
|
UNIT = 1
|
|
775
|
-
if quality[
|
|
829
|
+
if quality["license"]:
|
|
776
830
|
score += UNIT
|
|
777
|
-
if quality[
|
|
831
|
+
if quality["temporal_coverage"]:
|
|
778
832
|
score += UNIT
|
|
779
|
-
if quality[
|
|
833
|
+
if quality["spatial"]:
|
|
780
834
|
score += UNIT
|
|
781
|
-
if quality[
|
|
835
|
+
if quality["update_frequency"]:
|
|
782
836
|
score += UNIT
|
|
783
|
-
if
|
|
784
|
-
if quality[
|
|
837
|
+
if "update_fulfilled_in_time" in quality:
|
|
838
|
+
if quality["update_fulfilled_in_time"]:
|
|
785
839
|
score += UNIT
|
|
786
|
-
if quality[
|
|
840
|
+
if quality["dataset_description_quality"]:
|
|
787
841
|
score += UNIT
|
|
788
|
-
if
|
|
789
|
-
if quality[
|
|
842
|
+
if "has_resources" in quality:
|
|
843
|
+
if quality["has_open_format"]:
|
|
790
844
|
score += UNIT
|
|
791
|
-
if quality[
|
|
845
|
+
if quality["all_resources_available"]:
|
|
792
846
|
score += UNIT
|
|
793
|
-
if quality[
|
|
847
|
+
if quality["resources_documentation"]:
|
|
794
848
|
score += UNIT
|
|
795
849
|
return self.normalize_score(score)
|
|
796
850
|
|
|
@@ -800,25 +854,18 @@ class Dataset(WithMetrics, BadgeMixin, Owned, db.Document):
|
|
|
800
854
|
return obj or cls.objects.get_or_404(id=id_or_slug)
|
|
801
855
|
|
|
802
856
|
def add_resource(self, resource):
|
|
803
|
-
|
|
857
|
+
"""Perform an atomic prepend for a new resource"""
|
|
804
858
|
resource.validate()
|
|
805
|
-
self.update(
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
'$each': [resource.to_mongo()],
|
|
809
|
-
'$position': 0
|
|
810
|
-
}
|
|
811
|
-
}
|
|
812
|
-
})
|
|
859
|
+
self.update(
|
|
860
|
+
__raw__={"$push": {"resources": {"$each": [resource.to_mongo()], "$position": 0}}}
|
|
861
|
+
)
|
|
813
862
|
self.reload()
|
|
814
863
|
self.on_resource_added.send(self.__class__, document=self, resource_id=resource.id)
|
|
815
864
|
|
|
816
865
|
def update_resource(self, resource):
|
|
817
|
-
|
|
866
|
+
"""Perform an atomic update for an existing resource"""
|
|
818
867
|
index = self.resources.index(resource)
|
|
819
|
-
data = {
|
|
820
|
-
'resources__{index}'.format(index=index): resource
|
|
821
|
-
}
|
|
868
|
+
data = {"resources__{index}".format(index=index): resource}
|
|
822
869
|
self.update(**data)
|
|
823
870
|
self.reload()
|
|
824
871
|
self.on_resource_updated.send(self.__class__, document=self, resource_id=resource.id)
|
|
@@ -838,29 +885,26 @@ class Dataset(WithMetrics, BadgeMixin, Owned, db.Document):
|
|
|
838
885
|
@cached_property
|
|
839
886
|
def json_ld(self):
|
|
840
887
|
result = {
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
888
|
+
"@context": "http://schema.org",
|
|
889
|
+
"@type": "Dataset",
|
|
890
|
+
"@id": str(self.id),
|
|
891
|
+
"alternateName": self.slug,
|
|
892
|
+
"dateCreated": self.created_at.isoformat(),
|
|
893
|
+
"dateModified": self.last_modified.isoformat(),
|
|
894
|
+
"url": endpoint_for("datasets.show", "api.dataset", dataset=self, _external=True),
|
|
895
|
+
"name": self.title,
|
|
896
|
+
"keywords": ",".join(self.tags),
|
|
897
|
+
"distribution": [resource.json_ld for resource in self.resources],
|
|
851
898
|
# Theses values are not standard
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
],
|
|
855
|
-
'extras': [get_json_ld_extra(*item)
|
|
856
|
-
for item in self.extras.items()],
|
|
899
|
+
"contributedDistribution": [resource.json_ld for resource in self.community_resources],
|
|
900
|
+
"extras": [get_json_ld_extra(*item) for item in self.extras.items()],
|
|
857
901
|
}
|
|
858
902
|
|
|
859
903
|
if self.description:
|
|
860
|
-
result[
|
|
904
|
+
result["description"] = mdstrip(self.description)
|
|
861
905
|
|
|
862
906
|
if self.license and self.license.url:
|
|
863
|
-
result[
|
|
907
|
+
result["license"] = self.license.url
|
|
864
908
|
|
|
865
909
|
if self.organization:
|
|
866
910
|
author = self.organization.json_ld
|
|
@@ -870,34 +914,37 @@ class Dataset(WithMetrics, BadgeMixin, Owned, db.Document):
|
|
|
870
914
|
author = None
|
|
871
915
|
|
|
872
916
|
if author:
|
|
873
|
-
result[
|
|
917
|
+
result["author"] = author
|
|
874
918
|
|
|
875
919
|
return result
|
|
876
920
|
|
|
877
921
|
@property
|
|
878
922
|
def internal(self):
|
|
879
923
|
return {
|
|
880
|
-
|
|
881
|
-
|
|
924
|
+
"created_at_internal": self.created_at_internal,
|
|
925
|
+
"last_modified_internal": self.last_modified_internal,
|
|
882
926
|
}
|
|
883
927
|
|
|
884
928
|
@property
|
|
885
929
|
def views_count(self):
|
|
886
|
-
return self.metrics.get(
|
|
930
|
+
return self.metrics.get("views", 0)
|
|
887
931
|
|
|
888
932
|
def count_discussions(self):
|
|
889
933
|
from udata.models import Discussion
|
|
890
|
-
|
|
934
|
+
|
|
935
|
+
self.metrics["discussions"] = Discussion.objects(subject=self, closed=None).count()
|
|
891
936
|
self.save()
|
|
892
937
|
|
|
893
938
|
def count_reuses(self):
|
|
894
939
|
from udata.models import Reuse
|
|
895
|
-
|
|
940
|
+
|
|
941
|
+
self.metrics["reuses"] = Reuse.objects(datasets=self).visible().count()
|
|
896
942
|
self.save()
|
|
897
943
|
|
|
898
944
|
def count_followers(self):
|
|
899
945
|
from udata.models import Follow
|
|
900
|
-
|
|
946
|
+
|
|
947
|
+
self.metrics["followers"] = Follow.objects(until=None).followers(self).count()
|
|
901
948
|
self.save()
|
|
902
949
|
|
|
903
950
|
|
|
@@ -906,84 +953,93 @@ post_save.connect(Dataset.post_save, sender=Dataset)
|
|
|
906
953
|
|
|
907
954
|
|
|
908
955
|
class CommunityResource(ResourceMixin, WithMetrics, Owned, db.Document):
|
|
909
|
-
|
|
956
|
+
"""
|
|
910
957
|
Local file, remote file or API added by the community of the users to the
|
|
911
958
|
original dataset
|
|
912
|
-
|
|
959
|
+
"""
|
|
960
|
+
|
|
913
961
|
dataset = db.ReferenceField(Dataset, reverse_delete_rule=db.NULLIFY)
|
|
914
962
|
|
|
915
963
|
__metrics_keys__ = [
|
|
916
|
-
|
|
964
|
+
"views",
|
|
917
965
|
]
|
|
918
966
|
|
|
919
967
|
meta = {
|
|
920
|
-
|
|
921
|
-
|
|
968
|
+
"ordering": ["-created_at_internal"],
|
|
969
|
+
"queryset_class": OwnedQuerySet,
|
|
922
970
|
}
|
|
923
971
|
|
|
924
972
|
@property
|
|
925
973
|
def from_community(self):
|
|
926
974
|
return True
|
|
927
975
|
|
|
976
|
+
|
|
928
977
|
class ResourceSchema(object):
|
|
929
978
|
@staticmethod
|
|
930
979
|
@cache.memoize(timeout=SCHEMA_CACHE_DURATION)
|
|
931
980
|
def all():
|
|
932
|
-
|
|
981
|
+
"""
|
|
933
982
|
Get a list of schemas from a schema catalog endpoint.
|
|
934
983
|
|
|
935
984
|
This has a double layer of cache:
|
|
936
985
|
- @cache.cached decorator w/ short lived cache for normal operations
|
|
937
986
|
- a long terme cache w/o timeout to be able to always render some content
|
|
938
|
-
|
|
939
|
-
endpoint = current_app.config.get(
|
|
987
|
+
"""
|
|
988
|
+
endpoint = current_app.config.get("SCHEMA_CATALOG_URL")
|
|
940
989
|
if endpoint is None:
|
|
941
990
|
return []
|
|
942
991
|
|
|
943
|
-
cache_key =
|
|
992
|
+
cache_key = "schema-catalog-objects"
|
|
944
993
|
try:
|
|
945
994
|
response = requests.get(endpoint, timeout=5)
|
|
946
995
|
# do not cache 404 and forward status code
|
|
947
996
|
if response.status_code == 404:
|
|
948
|
-
raise SchemasCatalogNotFoundException(
|
|
997
|
+
raise SchemasCatalogNotFoundException(
|
|
998
|
+
f"Schemas catalog does not exist at {endpoint}"
|
|
999
|
+
)
|
|
949
1000
|
response.raise_for_status()
|
|
950
1001
|
except requests.exceptions.RequestException as e:
|
|
951
|
-
log.exception(f
|
|
1002
|
+
log.exception(f"Error while getting schema catalog from {endpoint}")
|
|
952
1003
|
schemas = cache.get(cache_key)
|
|
953
1004
|
else:
|
|
954
|
-
schemas = response.json().get(
|
|
1005
|
+
schemas = response.json().get("schemas", [])
|
|
955
1006
|
cache.set(cache_key, schemas)
|
|
956
1007
|
# no cached version or no content
|
|
957
1008
|
if not schemas:
|
|
958
|
-
log.error(f
|
|
959
|
-
raise SchemasCacheUnavailableException(
|
|
1009
|
+
log.error(f"No content found inc. from cache for schema catalog")
|
|
1010
|
+
raise SchemasCacheUnavailableException("No content in cache for schema catalog")
|
|
960
1011
|
|
|
961
1012
|
return schemas
|
|
962
|
-
|
|
1013
|
+
|
|
963
1014
|
def assignable_schemas():
|
|
964
|
-
return [
|
|
1015
|
+
return [
|
|
1016
|
+
s
|
|
1017
|
+
for s in ResourceSchema.all()
|
|
1018
|
+
if s.get("schema_type") not in NON_ASSIGNABLE_SCHEMA_TYPES
|
|
1019
|
+
]
|
|
965
1020
|
|
|
966
1021
|
def get_existing_schema_info_by_url(url: str) -> tuple[str, str | None] | None:
|
|
967
|
-
|
|
1022
|
+
"""
|
|
968
1023
|
Returns the name and the version if exists
|
|
969
|
-
|
|
1024
|
+
"""
|
|
970
1025
|
for schema in ResourceSchema.all():
|
|
971
|
-
for version in schema[
|
|
972
|
-
if version[
|
|
973
|
-
return schema[
|
|
1026
|
+
for version in schema["versions"]:
|
|
1027
|
+
if version["schema_url"] == url:
|
|
1028
|
+
return schema["name"], version["version_name"]
|
|
974
1029
|
|
|
975
|
-
if schema[
|
|
1030
|
+
if schema["schema_url"] == url:
|
|
976
1031
|
# The main schema URL is often the 'latest' version but
|
|
977
1032
|
# not sure if it's mandatory everywhere so set the version to
|
|
978
1033
|
# None here.
|
|
979
|
-
return schema[
|
|
1034
|
+
return schema["name"], None
|
|
980
1035
|
|
|
981
1036
|
return None
|
|
982
|
-
|
|
1037
|
+
|
|
1038
|
+
|
|
983
1039
|
def get_resource(id):
|
|
984
|
-
|
|
1040
|
+
"""Fetch a resource given its UUID"""
|
|
985
1041
|
dataset = Dataset.objects(resources__id=id).first()
|
|
986
1042
|
if dataset:
|
|
987
|
-
return get_by(dataset.resources,
|
|
1043
|
+
return get_by(dataset.resources, "id", id)
|
|
988
1044
|
else:
|
|
989
1045
|
return CommunityResource.objects(id=id).first()
|