udata 9.1.2.dev30355__py2.py3-none-any.whl → 9.1.2.dev30454__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of udata might be problematic. Click here for more details.

Files changed (413) hide show
  1. tasks/__init__.py +109 -107
  2. tasks/helpers.py +18 -18
  3. udata/__init__.py +4 -4
  4. udata/admin/views.py +5 -5
  5. udata/api/__init__.py +111 -134
  6. udata/api/commands.py +45 -37
  7. udata/api/errors.py +5 -4
  8. udata/api/fields.py +23 -21
  9. udata/api/oauth2.py +55 -74
  10. udata/api/parsers.py +15 -15
  11. udata/api/signals.py +1 -1
  12. udata/api_fields.py +137 -89
  13. udata/app.py +58 -55
  14. udata/assets.py +5 -5
  15. udata/auth/__init__.py +37 -26
  16. udata/auth/forms.py +23 -15
  17. udata/auth/helpers.py +1 -1
  18. udata/auth/mails.py +3 -3
  19. udata/auth/password_validation.py +19 -15
  20. udata/auth/views.py +94 -68
  21. udata/commands/__init__.py +71 -69
  22. udata/commands/cache.py +7 -7
  23. udata/commands/db.py +201 -140
  24. udata/commands/dcat.py +36 -30
  25. udata/commands/fixtures.py +100 -84
  26. udata/commands/images.py +21 -20
  27. udata/commands/info.py +17 -20
  28. udata/commands/init.py +10 -10
  29. udata/commands/purge.py +12 -13
  30. udata/commands/serve.py +41 -29
  31. udata/commands/static.py +16 -18
  32. udata/commands/test.py +20 -20
  33. udata/commands/tests/fixtures.py +26 -24
  34. udata/commands/worker.py +31 -33
  35. udata/core/__init__.py +12 -12
  36. udata/core/activity/__init__.py +0 -1
  37. udata/core/activity/api.py +59 -49
  38. udata/core/activity/models.py +28 -26
  39. udata/core/activity/signals.py +1 -1
  40. udata/core/activity/tasks.py +16 -10
  41. udata/core/badges/api.py +6 -6
  42. udata/core/badges/commands.py +14 -13
  43. udata/core/badges/fields.py +8 -5
  44. udata/core/badges/forms.py +7 -4
  45. udata/core/badges/models.py +16 -31
  46. udata/core/badges/permissions.py +1 -3
  47. udata/core/badges/signals.py +2 -2
  48. udata/core/badges/tasks.py +3 -2
  49. udata/core/badges/tests/test_commands.py +10 -10
  50. udata/core/badges/tests/test_model.py +24 -31
  51. udata/core/contact_point/api.py +19 -18
  52. udata/core/contact_point/api_fields.py +21 -14
  53. udata/core/contact_point/factories.py +2 -2
  54. udata/core/contact_point/forms.py +7 -6
  55. udata/core/contact_point/models.py +3 -5
  56. udata/core/dataservices/api.py +26 -21
  57. udata/core/dataservices/factories.py +13 -11
  58. udata/core/dataservices/models.py +35 -40
  59. udata/core/dataservices/permissions.py +4 -4
  60. udata/core/dataservices/rdf.py +40 -17
  61. udata/core/dataservices/tasks.py +4 -3
  62. udata/core/dataset/actions.py +10 -10
  63. udata/core/dataset/activities.py +21 -23
  64. udata/core/dataset/api.py +321 -298
  65. udata/core/dataset/api_fields.py +443 -271
  66. udata/core/dataset/apiv2.py +305 -229
  67. udata/core/dataset/commands.py +38 -36
  68. udata/core/dataset/constants.py +61 -54
  69. udata/core/dataset/csv.py +70 -74
  70. udata/core/dataset/events.py +39 -32
  71. udata/core/dataset/exceptions.py +8 -4
  72. udata/core/dataset/factories.py +57 -65
  73. udata/core/dataset/forms.py +87 -63
  74. udata/core/dataset/models.py +336 -280
  75. udata/core/dataset/permissions.py +9 -6
  76. udata/core/dataset/preview.py +15 -17
  77. udata/core/dataset/rdf.py +156 -122
  78. udata/core/dataset/search.py +92 -77
  79. udata/core/dataset/signals.py +1 -1
  80. udata/core/dataset/tasks.py +63 -54
  81. udata/core/discussions/actions.py +5 -5
  82. udata/core/discussions/api.py +124 -120
  83. udata/core/discussions/factories.py +2 -2
  84. udata/core/discussions/forms.py +9 -7
  85. udata/core/discussions/metrics.py +1 -3
  86. udata/core/discussions/models.py +25 -24
  87. udata/core/discussions/notifications.py +18 -14
  88. udata/core/discussions/permissions.py +3 -3
  89. udata/core/discussions/signals.py +4 -4
  90. udata/core/discussions/tasks.py +24 -28
  91. udata/core/followers/api.py +32 -33
  92. udata/core/followers/models.py +9 -9
  93. udata/core/followers/signals.py +3 -3
  94. udata/core/jobs/actions.py +7 -7
  95. udata/core/jobs/api.py +99 -92
  96. udata/core/jobs/commands.py +48 -49
  97. udata/core/jobs/forms.py +11 -11
  98. udata/core/jobs/models.py +6 -6
  99. udata/core/metrics/__init__.py +2 -2
  100. udata/core/metrics/commands.py +34 -30
  101. udata/core/metrics/models.py +2 -4
  102. udata/core/metrics/signals.py +1 -1
  103. udata/core/metrics/tasks.py +3 -3
  104. udata/core/organization/activities.py +12 -15
  105. udata/core/organization/api.py +167 -174
  106. udata/core/organization/api_fields.py +183 -124
  107. udata/core/organization/apiv2.py +32 -32
  108. udata/core/organization/commands.py +20 -22
  109. udata/core/organization/constants.py +11 -11
  110. udata/core/organization/csv.py +17 -15
  111. udata/core/organization/factories.py +8 -11
  112. udata/core/organization/forms.py +32 -26
  113. udata/core/organization/metrics.py +2 -1
  114. udata/core/organization/models.py +87 -67
  115. udata/core/organization/notifications.py +18 -14
  116. udata/core/organization/permissions.py +10 -11
  117. udata/core/organization/rdf.py +14 -14
  118. udata/core/organization/search.py +30 -28
  119. udata/core/organization/signals.py +7 -7
  120. udata/core/organization/tasks.py +42 -61
  121. udata/core/owned.py +38 -27
  122. udata/core/post/api.py +82 -81
  123. udata/core/post/constants.py +8 -5
  124. udata/core/post/factories.py +4 -4
  125. udata/core/post/forms.py +13 -14
  126. udata/core/post/models.py +20 -22
  127. udata/core/post/tests/test_api.py +30 -32
  128. udata/core/reports/api.py +8 -7
  129. udata/core/reports/constants.py +1 -3
  130. udata/core/reports/models.py +10 -10
  131. udata/core/reuse/activities.py +15 -19
  132. udata/core/reuse/api.py +123 -126
  133. udata/core/reuse/api_fields.py +120 -85
  134. udata/core/reuse/apiv2.py +11 -10
  135. udata/core/reuse/constants.py +23 -23
  136. udata/core/reuse/csv.py +18 -18
  137. udata/core/reuse/factories.py +5 -9
  138. udata/core/reuse/forms.py +24 -21
  139. udata/core/reuse/models.py +55 -51
  140. udata/core/reuse/permissions.py +2 -2
  141. udata/core/reuse/search.py +49 -46
  142. udata/core/reuse/signals.py +1 -1
  143. udata/core/reuse/tasks.py +4 -5
  144. udata/core/site/api.py +47 -50
  145. udata/core/site/factories.py +2 -2
  146. udata/core/site/forms.py +4 -5
  147. udata/core/site/models.py +94 -63
  148. udata/core/site/rdf.py +14 -14
  149. udata/core/spam/api.py +16 -9
  150. udata/core/spam/constants.py +4 -4
  151. udata/core/spam/fields.py +13 -7
  152. udata/core/spam/models.py +27 -20
  153. udata/core/spam/signals.py +1 -1
  154. udata/core/spam/tests/test_spam.py +6 -5
  155. udata/core/spatial/api.py +72 -80
  156. udata/core/spatial/api_fields.py +73 -58
  157. udata/core/spatial/commands.py +67 -64
  158. udata/core/spatial/constants.py +3 -3
  159. udata/core/spatial/factories.py +37 -54
  160. udata/core/spatial/forms.py +27 -26
  161. udata/core/spatial/geoids.py +17 -17
  162. udata/core/spatial/models.py +43 -47
  163. udata/core/spatial/tasks.py +2 -1
  164. udata/core/spatial/tests/test_api.py +115 -130
  165. udata/core/spatial/tests/test_fields.py +74 -77
  166. udata/core/spatial/tests/test_geoid.py +22 -22
  167. udata/core/spatial/tests/test_models.py +5 -7
  168. udata/core/spatial/translations.py +16 -16
  169. udata/core/storages/__init__.py +16 -18
  170. udata/core/storages/api.py +66 -64
  171. udata/core/storages/tasks.py +7 -7
  172. udata/core/storages/utils.py +15 -15
  173. udata/core/storages/views.py +5 -6
  174. udata/core/tags/api.py +17 -14
  175. udata/core/tags/csv.py +4 -4
  176. udata/core/tags/models.py +8 -5
  177. udata/core/tags/tasks.py +11 -13
  178. udata/core/tags/views.py +4 -4
  179. udata/core/topic/api.py +84 -73
  180. udata/core/topic/apiv2.py +157 -127
  181. udata/core/topic/factories.py +3 -4
  182. udata/core/topic/forms.py +12 -14
  183. udata/core/topic/models.py +14 -19
  184. udata/core/topic/parsers.py +26 -26
  185. udata/core/user/activities.py +30 -29
  186. udata/core/user/api.py +151 -152
  187. udata/core/user/api_fields.py +132 -100
  188. udata/core/user/apiv2.py +7 -7
  189. udata/core/user/commands.py +38 -38
  190. udata/core/user/factories.py +8 -9
  191. udata/core/user/forms.py +14 -11
  192. udata/core/user/metrics.py +2 -2
  193. udata/core/user/models.py +68 -69
  194. udata/core/user/permissions.py +4 -5
  195. udata/core/user/rdf.py +7 -8
  196. udata/core/user/tasks.py +2 -2
  197. udata/core/user/tests/test_user_model.py +24 -16
  198. udata/cors.py +99 -0
  199. udata/db/tasks.py +2 -1
  200. udata/entrypoints.py +35 -31
  201. udata/errors.py +2 -1
  202. udata/event/values.py +6 -6
  203. udata/factories.py +2 -2
  204. udata/features/identicon/api.py +5 -6
  205. udata/features/identicon/backends.py +48 -55
  206. udata/features/identicon/tests/test_backends.py +4 -5
  207. udata/features/notifications/__init__.py +0 -1
  208. udata/features/notifications/actions.py +9 -9
  209. udata/features/notifications/api.py +17 -13
  210. udata/features/territories/__init__.py +12 -10
  211. udata/features/territories/api.py +14 -15
  212. udata/features/territories/models.py +23 -28
  213. udata/features/transfer/actions.py +8 -11
  214. udata/features/transfer/api.py +84 -77
  215. udata/features/transfer/factories.py +2 -1
  216. udata/features/transfer/models.py +11 -12
  217. udata/features/transfer/notifications.py +19 -15
  218. udata/features/transfer/permissions.py +5 -5
  219. udata/forms/__init__.py +5 -2
  220. udata/forms/fields.py +164 -172
  221. udata/forms/validators.py +19 -22
  222. udata/forms/widgets.py +9 -13
  223. udata/frontend/__init__.py +31 -26
  224. udata/frontend/csv.py +68 -58
  225. udata/frontend/markdown.py +40 -44
  226. udata/harvest/actions.py +89 -77
  227. udata/harvest/api.py +294 -238
  228. udata/harvest/backends/__init__.py +4 -4
  229. udata/harvest/backends/base.py +128 -111
  230. udata/harvest/backends/dcat.py +80 -66
  231. udata/harvest/commands.py +56 -60
  232. udata/harvest/csv.py +8 -8
  233. udata/harvest/exceptions.py +6 -3
  234. udata/harvest/filters.py +24 -23
  235. udata/harvest/forms.py +27 -28
  236. udata/harvest/models.py +88 -80
  237. udata/harvest/notifications.py +15 -10
  238. udata/harvest/signals.py +13 -13
  239. udata/harvest/tasks.py +11 -10
  240. udata/harvest/tests/factories.py +23 -24
  241. udata/harvest/tests/test_actions.py +136 -166
  242. udata/harvest/tests/test_api.py +220 -214
  243. udata/harvest/tests/test_base_backend.py +117 -112
  244. udata/harvest/tests/test_dcat_backend.py +380 -308
  245. udata/harvest/tests/test_filters.py +33 -22
  246. udata/harvest/tests/test_models.py +11 -14
  247. udata/harvest/tests/test_notifications.py +6 -7
  248. udata/harvest/tests/test_tasks.py +7 -6
  249. udata/i18n.py +237 -78
  250. udata/linkchecker/backends.py +5 -11
  251. udata/linkchecker/checker.py +23 -22
  252. udata/linkchecker/commands.py +4 -6
  253. udata/linkchecker/models.py +6 -6
  254. udata/linkchecker/tasks.py +18 -20
  255. udata/mail.py +21 -21
  256. udata/migrations/2020-07-24-remove-s-from-scope-oauth.py +9 -8
  257. udata/migrations/2020-08-24-add-fs-filename.py +9 -8
  258. udata/migrations/2020-09-28-update-reuses-datasets-metrics.py +5 -4
  259. udata/migrations/2020-10-16-migrate-ods-resources.py +9 -10
  260. udata/migrations/2021-04-08-update-schema-with-new-structure.py +8 -7
  261. udata/migrations/2021-05-27-fix-default-schema-name.py +7 -6
  262. udata/migrations/2021-07-05-remove-unused-badges.py +17 -15
  263. udata/migrations/2021-07-07-update-schema-for-community-resources.py +7 -6
  264. udata/migrations/2021-08-17-follow-integrity.py +5 -4
  265. udata/migrations/2021-08-17-harvest-integrity.py +13 -12
  266. udata/migrations/2021-08-17-oauth2client-integrity.py +5 -4
  267. udata/migrations/2021-08-17-transfer-integrity.py +5 -4
  268. udata/migrations/2021-08-17-users-integrity.py +9 -8
  269. udata/migrations/2021-12-14-reuse-topics.py +7 -6
  270. udata/migrations/2022-04-21-improve-extension-detection.py +8 -7
  271. udata/migrations/2022-09-22-clean-inactive-harvest-datasets.py +16 -14
  272. udata/migrations/2022-10-10-add-fs_uniquifier-to-user-model.py +6 -6
  273. udata/migrations/2022-10-10-migrate-harvest-extras.py +36 -26
  274. udata/migrations/2023-02-08-rename-internal-dates.py +46 -28
  275. udata/migrations/2024-01-29-fix-reuse-and-dataset-with-private-None.py +10 -8
  276. udata/migrations/2024-03-22-migrate-activity-kwargs-to-extras.py +6 -4
  277. udata/migrations/2024-06-11-fix-reuse-datasets-references.py +7 -6
  278. udata/migrations/__init__.py +123 -105
  279. udata/models/__init__.py +4 -4
  280. udata/mongo/__init__.py +13 -11
  281. udata/mongo/badges_field.py +3 -2
  282. udata/mongo/datetime_fields.py +13 -12
  283. udata/mongo/document.py +17 -16
  284. udata/mongo/engine.py +15 -16
  285. udata/mongo/errors.py +2 -1
  286. udata/mongo/extras_fields.py +30 -20
  287. udata/mongo/queryset.py +12 -12
  288. udata/mongo/slug_fields.py +38 -28
  289. udata/mongo/taglist_field.py +1 -2
  290. udata/mongo/url_field.py +5 -5
  291. udata/mongo/uuid_fields.py +4 -3
  292. udata/notifications/__init__.py +1 -1
  293. udata/notifications/mattermost.py +10 -9
  294. udata/rdf.py +167 -188
  295. udata/routing.py +40 -45
  296. udata/search/__init__.py +18 -19
  297. udata/search/adapter.py +17 -16
  298. udata/search/commands.py +44 -51
  299. udata/search/fields.py +13 -20
  300. udata/search/query.py +23 -18
  301. udata/search/result.py +9 -10
  302. udata/sentry.py +21 -19
  303. udata/settings.py +262 -198
  304. udata/sitemap.py +8 -6
  305. udata/storage/s3.py +20 -13
  306. udata/tags.py +4 -5
  307. udata/tasks.py +43 -42
  308. udata/tests/__init__.py +9 -6
  309. udata/tests/api/__init__.py +8 -6
  310. udata/tests/api/test_auth_api.py +395 -321
  311. udata/tests/api/test_base_api.py +33 -35
  312. udata/tests/api/test_contact_points.py +7 -9
  313. udata/tests/api/test_dataservices_api.py +211 -158
  314. udata/tests/api/test_datasets_api.py +823 -812
  315. udata/tests/api/test_follow_api.py +13 -15
  316. udata/tests/api/test_me_api.py +95 -112
  317. udata/tests/api/test_organizations_api.py +301 -339
  318. udata/tests/api/test_reports_api.py +35 -25
  319. udata/tests/api/test_reuses_api.py +134 -139
  320. udata/tests/api/test_swagger.py +5 -5
  321. udata/tests/api/test_tags_api.py +18 -25
  322. udata/tests/api/test_topics_api.py +94 -94
  323. udata/tests/api/test_transfer_api.py +53 -48
  324. udata/tests/api/test_user_api.py +128 -141
  325. udata/tests/apiv2/test_datasets.py +290 -198
  326. udata/tests/apiv2/test_me_api.py +10 -11
  327. udata/tests/apiv2/test_organizations.py +56 -74
  328. udata/tests/apiv2/test_swagger.py +5 -5
  329. udata/tests/apiv2/test_topics.py +69 -87
  330. udata/tests/cli/test_cli_base.py +8 -8
  331. udata/tests/cli/test_db_cli.py +21 -19
  332. udata/tests/dataservice/test_dataservice_tasks.py +8 -12
  333. udata/tests/dataset/test_csv_adapter.py +44 -35
  334. udata/tests/dataset/test_dataset_actions.py +2 -3
  335. udata/tests/dataset/test_dataset_commands.py +7 -8
  336. udata/tests/dataset/test_dataset_events.py +36 -29
  337. udata/tests/dataset/test_dataset_model.py +224 -217
  338. udata/tests/dataset/test_dataset_rdf.py +142 -131
  339. udata/tests/dataset/test_dataset_tasks.py +15 -15
  340. udata/tests/dataset/test_resource_preview.py +10 -13
  341. udata/tests/features/territories/__init__.py +9 -13
  342. udata/tests/features/territories/test_territories_api.py +71 -91
  343. udata/tests/forms/test_basic_fields.py +7 -7
  344. udata/tests/forms/test_current_user_field.py +39 -66
  345. udata/tests/forms/test_daterange_field.py +31 -39
  346. udata/tests/forms/test_dict_field.py +28 -26
  347. udata/tests/forms/test_extras_fields.py +102 -76
  348. udata/tests/forms/test_form_field.py +8 -8
  349. udata/tests/forms/test_image_field.py +33 -26
  350. udata/tests/forms/test_model_field.py +134 -123
  351. udata/tests/forms/test_model_list_field.py +7 -7
  352. udata/tests/forms/test_nested_model_list_field.py +117 -79
  353. udata/tests/forms/test_publish_as_field.py +36 -65
  354. udata/tests/forms/test_reference_field.py +34 -53
  355. udata/tests/forms/test_user_forms.py +23 -21
  356. udata/tests/forms/test_uuid_field.py +6 -10
  357. udata/tests/frontend/__init__.py +9 -6
  358. udata/tests/frontend/test_auth.py +7 -6
  359. udata/tests/frontend/test_csv.py +81 -96
  360. udata/tests/frontend/test_hooks.py +43 -43
  361. udata/tests/frontend/test_markdown.py +211 -191
  362. udata/tests/helpers.py +32 -37
  363. udata/tests/models.py +2 -2
  364. udata/tests/organization/test_csv_adapter.py +21 -16
  365. udata/tests/organization/test_notifications.py +11 -18
  366. udata/tests/organization/test_organization_model.py +13 -13
  367. udata/tests/organization/test_organization_rdf.py +29 -22
  368. udata/tests/organization/test_organization_tasks.py +16 -17
  369. udata/tests/plugin.py +79 -73
  370. udata/tests/reuse/test_reuse_model.py +21 -21
  371. udata/tests/reuse/test_reuse_task.py +11 -13
  372. udata/tests/search/__init__.py +11 -12
  373. udata/tests/search/test_adapter.py +60 -70
  374. udata/tests/search/test_query.py +16 -16
  375. udata/tests/search/test_results.py +10 -7
  376. udata/tests/site/test_site_api.py +11 -16
  377. udata/tests/site/test_site_metrics.py +20 -30
  378. udata/tests/site/test_site_model.py +4 -5
  379. udata/tests/site/test_site_rdf.py +94 -78
  380. udata/tests/test_activity.py +17 -17
  381. udata/tests/test_cors.py +62 -0
  382. udata/tests/test_discussions.py +292 -299
  383. udata/tests/test_i18n.py +37 -40
  384. udata/tests/test_linkchecker.py +91 -85
  385. udata/tests/test_mail.py +13 -17
  386. udata/tests/test_migrations.py +219 -180
  387. udata/tests/test_model.py +164 -157
  388. udata/tests/test_notifications.py +17 -17
  389. udata/tests/test_owned.py +14 -14
  390. udata/tests/test_rdf.py +25 -23
  391. udata/tests/test_routing.py +89 -93
  392. udata/tests/test_storages.py +137 -128
  393. udata/tests/test_tags.py +44 -46
  394. udata/tests/test_topics.py +7 -7
  395. udata/tests/test_transfer.py +42 -49
  396. udata/tests/test_uris.py +160 -161
  397. udata/tests/test_utils.py +79 -71
  398. udata/tests/user/test_user_rdf.py +5 -9
  399. udata/tests/workers/test_jobs_commands.py +57 -58
  400. udata/tests/workers/test_tasks_routing.py +23 -29
  401. udata/tests/workers/test_workers_api.py +125 -131
  402. udata/tests/workers/test_workers_helpers.py +6 -6
  403. udata/tracking.py +4 -6
  404. udata/uris.py +45 -46
  405. udata/utils.py +68 -66
  406. udata/wsgi.py +1 -1
  407. {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30454.dist-info}/METADATA +7 -3
  408. udata-9.1.2.dev30454.dist-info/RECORD +706 -0
  409. udata-9.1.2.dev30355.dist-info/RECORD +0 -704
  410. {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30454.dist-info}/LICENSE +0 -0
  411. {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30454.dist-info}/WHEEL +0 -0
  412. {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30454.dist-info}/entry_points.txt +0 -0
  413. {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30454.dist-info}/top_level.txt +0 -0
udata/core/dataset/rdf.py CHANGED
@@ -1,47 +1,67 @@
1
- '''
1
+ """
2
2
  This module centralize dataset helpers for RDF/DCAT serialization and parsing
3
- '''
3
+ """
4
+
4
5
  import calendar
5
6
  import json
6
7
  import logging
7
-
8
8
  from datetime import date
9
+
9
10
  from dateutil.parser import parse as parse_dt
10
11
  from flask import current_app
11
12
  from geomet import wkt
12
- from rdflib import Graph, URIRef, Literal, BNode
13
- from rdflib.resource import Resource as RdfResource
14
- from rdflib.namespace import RDF
15
13
  from mongoengine.errors import ValidationError
14
+ from rdflib import BNode, Graph, Literal, URIRef
15
+ from rdflib.namespace import RDF
16
+ from rdflib.resource import Resource as RdfResource
16
17
 
17
18
  from udata import i18n, uris
18
- from udata.core.spatial.models import SpatialCoverage
19
19
  from udata.core.dataset.models import HarvestDatasetMetadata, HarvestResourceMetadata
20
+ from udata.core.spatial.models import SpatialCoverage
20
21
  from udata.harvest.exceptions import HarvestSkipException
21
22
  from udata.models import db
22
23
  from udata.rdf import (
23
- DCAT, DCATAP, DCT, FREQ, SCV, SKOS, SPDX, SCHEMA, EUFREQ, EUFORMAT, IANAFORMAT, TAG_TO_EU_HVD_CATEGORIES, RDFS,
24
- namespace_manager, rdf_value, remote_url_from_rdf, sanitize_html, schema_from_rdf, themes_from_rdf, url_from_rdf, HVD_LEGISLATION,
24
+ DCAT,
25
+ DCATAP,
26
+ DCT,
27
+ EUFORMAT,
28
+ EUFREQ,
29
+ FREQ,
30
+ HVD_LEGISLATION,
31
+ IANAFORMAT,
32
+ RDFS,
33
+ SCHEMA,
34
+ SCV,
35
+ SKOS,
36
+ SPDX,
37
+ TAG_TO_EU_HVD_CATEGORIES,
25
38
  contact_point_from_rdf,
39
+ namespace_manager,
40
+ rdf_value,
41
+ remote_url_from_rdf,
42
+ sanitize_html,
43
+ schema_from_rdf,
44
+ themes_from_rdf,
45
+ url_from_rdf,
26
46
  )
27
- from udata.utils import get_by, safe_unicode
28
47
  from udata.uris import endpoint_for
48
+ from udata.utils import get_by, safe_unicode
29
49
 
30
- from .models import Dataset, Resource, Checksum, License
31
50
  from .constants import UPDATE_FREQUENCIES
51
+ from .models import Checksum, Dataset, License, Resource
32
52
 
33
53
  log = logging.getLogger(__name__)
34
54
 
35
55
  # Map extra frequencies (ie. not defined in Dublin Core) to closest equivalent
36
56
  RDF_FREQUENCIES = {
37
- 'punctual': None,
38
- 'hourly': FREQ.continuous,
39
- 'fourTimesADay': FREQ.daily,
40
- 'threeTimesADay': FREQ.daily,
41
- 'semidaily': FREQ.daily,
42
- 'fourTimesAWeek': FREQ.threeTimesAWeek,
43
- 'quinquennial': None,
44
- 'unknown': None,
57
+ "punctual": None,
58
+ "hourly": FREQ.continuous,
59
+ "fourTimesADay": FREQ.daily,
60
+ "threeTimesADay": FREQ.daily,
61
+ "semidaily": FREQ.daily,
62
+ "fourTimesAWeek": FREQ.threeTimesAWeek,
63
+ "quinquennial": None,
64
+ "unknown": None,
45
65
  }
46
66
 
47
67
  # Map european frequencies to their closest equivalent
@@ -50,31 +70,32 @@ RDF_FREQUENCIES = {
50
70
  # - https://publications.europa.eu/en/web/eu-vocabularies/at-dataset/-/resource/dataset/frequency # noqa: E501
51
71
  EU_RDF_REQUENCIES = {
52
72
  # Match Dublin Core name
53
- EUFREQ.ANNUAL: 'annual',
54
- EUFREQ.BIENNIAL: 'biennial',
55
- EUFREQ.TRIENNIAL: 'triennial',
56
- EUFREQ.QUARTERLY: 'quarterly',
57
- EUFREQ.MONTHLY: 'monthly',
58
- EUFREQ.BIMONTHLY: 'bimonthly',
59
- EUFREQ.WEEKLY: 'weekly',
60
- EUFREQ.BIWEEKLY: 'biweekly',
61
- EUFREQ.DAILY: 'daily',
73
+ EUFREQ.ANNUAL: "annual",
74
+ EUFREQ.BIENNIAL: "biennial",
75
+ EUFREQ.TRIENNIAL: "triennial",
76
+ EUFREQ.QUARTERLY: "quarterly",
77
+ EUFREQ.MONTHLY: "monthly",
78
+ EUFREQ.BIMONTHLY: "bimonthly",
79
+ EUFREQ.WEEKLY: "weekly",
80
+ EUFREQ.BIWEEKLY: "biweekly",
81
+ EUFREQ.DAILY: "daily",
62
82
  # Name differs from Dublin Core
63
- EUFREQ.ANNUAL_2: 'semiannual',
64
- EUFREQ.ANNUAL_3: 'threeTimesAYear',
65
- EUFREQ.MONTHLY_2: 'semimonthly',
66
- EUFREQ.MONTHLY_3: 'threeTimesAMonth',
67
- EUFREQ.WEEKLY_2: 'semiweekly',
68
- EUFREQ.WEEKLY_3: 'threeTimesAWeek',
69
- EUFREQ.DAILY_2: 'semidaily',
70
- EUFREQ.CONT: 'continuous',
71
- EUFREQ.UPDATE_CONT: 'continuous',
72
- EUFREQ.IRREG: 'irregular',
73
- EUFREQ.UNKNOWN: 'unknown',
74
- EUFREQ.OTHER: 'unknown',
75
- EUFREQ.NEVER: 'punctual',
83
+ EUFREQ.ANNUAL_2: "semiannual",
84
+ EUFREQ.ANNUAL_3: "threeTimesAYear",
85
+ EUFREQ.MONTHLY_2: "semimonthly",
86
+ EUFREQ.MONTHLY_3: "threeTimesAMonth",
87
+ EUFREQ.WEEKLY_2: "semiweekly",
88
+ EUFREQ.WEEKLY_3: "threeTimesAWeek",
89
+ EUFREQ.DAILY_2: "semidaily",
90
+ EUFREQ.CONT: "continuous",
91
+ EUFREQ.UPDATE_CONT: "continuous",
92
+ EUFREQ.IRREG: "irregular",
93
+ EUFREQ.UNKNOWN: "unknown",
94
+ EUFREQ.OTHER: "unknown",
95
+ EUFREQ.NEVER: "punctual",
76
96
  }
77
97
 
98
+
78
99
  def temporal_to_rdf(daterange, graph=None):
79
100
  if not daterange:
80
101
  return
@@ -104,18 +125,25 @@ def owner_to_rdf(dataset, graph=None):
104
125
 
105
126
 
106
127
  def resource_to_rdf(resource, dataset=None, graph=None, is_hvd=False):
107
- '''
128
+ """
108
129
  Map a Resource domain model to a DCAT/RDF graph
109
- '''
130
+ """
110
131
  graph = graph or Graph(namespace_manager=namespace_manager)
111
132
  if dataset and dataset.id:
112
- id = URIRef(endpoint_for('datasets.show_redirect', 'api.dataset', dataset=dataset.id,
113
- _external=True,
114
- _anchor='resource-{0}'.format(resource.id)))
133
+ id = URIRef(
134
+ endpoint_for(
135
+ "datasets.show_redirect",
136
+ "api.dataset",
137
+ dataset=dataset.id,
138
+ _external=True,
139
+ _anchor="resource-{0}".format(resource.id),
140
+ )
141
+ )
115
142
  else:
116
143
  id = BNode(resource.id)
117
- permalink = endpoint_for('datasets.resource', 'api.resource_redirect', id=resource.id,
118
- _external=True)
144
+ permalink = endpoint_for(
145
+ "datasets.resource", "api.resource_redirect", id=resource.id, _external=True
146
+ )
119
147
  r = graph.resource(id)
120
148
  r.set(RDF.type, DCAT.Distribution)
121
149
  r.set(DCT.identifier, Literal(resource.id))
@@ -138,7 +166,7 @@ def resource_to_rdf(resource, dataset=None, graph=None, is_hvd=False):
138
166
  if resource.checksum:
139
167
  checksum = graph.resource(BNode())
140
168
  checksum.set(RDF.type, SPDX.Checksum)
141
- algorithm = 'checksumAlgorithm_{0}'.format(resource.checksum.type)
169
+ algorithm = "checksumAlgorithm_{0}".format(resource.checksum.type)
142
170
  checksum.add(SPDX.algorithm, getattr(SPDX, algorithm))
143
171
  checksum.add(SPDX.checksumValue, Literal(resource.checksum.value))
144
172
  r.add(SPDX.checksum, checksum)
@@ -148,21 +176,25 @@ def resource_to_rdf(resource, dataset=None, graph=None, is_hvd=False):
148
176
  return r
149
177
 
150
178
 
151
- def dataset_to_graph_id(dataset: Dataset) -> URIRef | BNode:
179
+ def dataset_to_graph_id(dataset: Dataset) -> URIRef | BNode:
152
180
  if dataset.harvest and dataset.harvest.uri:
153
181
  return URIRef(dataset.harvest.uri)
154
182
  elif dataset.id:
155
- return URIRef(endpoint_for('datasets.show_redirect', 'api.dataset',
156
- dataset=dataset.id, _external=True))
183
+ return URIRef(
184
+ endpoint_for(
185
+ "datasets.show_redirect", "api.dataset", dataset=dataset.id, _external=True
186
+ )
187
+ )
157
188
  else:
158
189
  # Should not happen in production. Some test only
159
190
  # `build()` a dataset without saving it to the DB.
160
191
  return BNode()
161
192
 
193
+
162
194
  def dataset_to_rdf(dataset, graph=None):
163
- '''
195
+ """
164
196
  Map a dataset domain model to a DCAT/RDF graph
165
- '''
197
+ """
166
198
  # Use the unlocalized permalink to the dataset as URI when available
167
199
  # unless there is already an upstream URI
168
200
  id = dataset_to_graph_id(dataset)
@@ -187,7 +219,7 @@ def dataset_to_rdf(dataset, graph=None):
187
219
 
188
220
  # Add DCAT-AP HVD properties if the dataset is tagged hvd.
189
221
  # See https://semiceu.github.io/DCAT-AP/releases/2.2.0-hvd/
190
- is_hvd = current_app.config['HVD_SUPPORT'] and 'hvd' in dataset.tags
222
+ is_hvd = current_app.config["HVD_SUPPORT"] and "hvd" in dataset.tags
191
223
  if is_hvd:
192
224
  d.add(DCATAP.applicableLegislation, URIRef(HVD_LEGISLATION))
193
225
 
@@ -215,52 +247,46 @@ def dataset_to_rdf(dataset, graph=None):
215
247
 
216
248
 
217
249
  CHECKSUM_ALGORITHMS = {
218
- SPDX.checksumAlgorithm_md5: 'md5',
219
- SPDX.checksumAlgorithm_sha1: 'sha1',
220
- SPDX.checksumAlgorithm_sha256: 'sha256',
250
+ SPDX.checksumAlgorithm_md5: "md5",
251
+ SPDX.checksumAlgorithm_sha1: "sha1",
252
+ SPDX.checksumAlgorithm_sha256: "sha256",
221
253
  }
222
254
 
223
255
 
224
256
  def temporal_from_literal(text):
225
- '''
257
+ """
226
258
  Parse a temporal coverage from a literal ie. either:
227
259
  - an ISO date range
228
260
  - a single ISO date period (month,year)
229
- '''
230
- if text.count('/') == 1:
261
+ """
262
+ if text.count("/") == 1:
231
263
  # This is an ISO date range as preconized by Gov.uk
232
264
  # http://guidance.data.gov.uk/dcat_fields.html
233
- start, end = text.split('/')
234
- return db.DateRange(
235
- start=parse_dt(start).date(),
236
- end=parse_dt(end).date()
237
- )
265
+ start, end = text.split("/")
266
+ return db.DateRange(start=parse_dt(start).date(), end=parse_dt(end).date())
238
267
  else:
239
- separators = text.count('-')
268
+ separators = text.count("-")
240
269
  if separators == 0:
241
270
  # this is a year
242
- return db.DateRange(
243
- start=date(int(text), 1, 1),
244
- end=date(int(text), 12, 31)
245
- )
271
+ return db.DateRange(start=date(int(text), 1, 1), end=date(int(text), 12, 31))
246
272
  elif separators == 1:
247
273
  # this is a month
248
274
  dt = parse_dt(text).date()
249
275
  return db.DateRange(
250
276
  start=dt.replace(day=1),
251
- end=dt.replace(day=calendar.monthrange(dt.year, dt.month)[1])
277
+ end=dt.replace(day=calendar.monthrange(dt.year, dt.month)[1]),
252
278
  )
253
279
 
254
280
 
255
281
  def temporal_from_resource(resource):
256
- '''
282
+ """
257
283
  Parse a temporal coverage from a RDF class/resource ie. either:
258
284
  - a `dct:PeriodOfTime` with schema.org `startDate` and `endDate` properties
259
285
  - a `dct:PeriodOfTime` with DCAT `startDate` and `endDate` properties
260
286
  - an inline gov.uk Time Interval value
261
287
  - an URI reference to a gov.uk Time Interval ontology
262
288
  http://reference.data.gov.uk/
263
- '''
289
+ """
264
290
  if isinstance(resource.identifier, URIRef):
265
291
  # Fetch remote ontology if necessary
266
292
  g = Graph().parse(str(resource.identifier))
@@ -268,22 +294,21 @@ def temporal_from_resource(resource):
268
294
  if resource.value(SCHEMA.startDate):
269
295
  return db.DateRange(
270
296
  start=resource.value(SCHEMA.startDate).toPython(),
271
- end=resource.value(SCHEMA.endDate).toPython()
297
+ end=resource.value(SCHEMA.endDate).toPython(),
272
298
  )
273
299
  elif resource.value(DCAT.startDate):
274
300
  return db.DateRange(
275
301
  start=resource.value(DCAT.startDate).toPython(),
276
- end=resource.value(DCAT.endDate).toPython()
302
+ end=resource.value(DCAT.endDate).toPython(),
277
303
  )
278
304
  elif resource.value(SCV.min):
279
305
  return db.DateRange(
280
- start=resource.value(SCV.min).toPython(),
281
- end=resource.value(SCV.max).toPython()
306
+ start=resource.value(SCV.min).toPython(), end=resource.value(SCV.max).toPython()
282
307
  )
283
308
 
284
309
 
285
310
  def temporal_from_rdf(period_of_time):
286
- '''Failsafe parsing of a temporal coverage'''
311
+ """Failsafe parsing of a temporal coverage"""
287
312
  try:
288
313
  if isinstance(period_of_time, Literal):
289
314
  return temporal_from_literal(str(period_of_time))
@@ -293,30 +318,34 @@ def temporal_from_rdf(period_of_time):
293
318
  # There are a lot of cases where parsing could/should fail
294
319
  # but we never want to break the whole dataset parsing
295
320
  # so we log the error for future investigation and improvement
296
- log.warning('Unable to parse temporal coverage', exc_info=True)
321
+ log.warning("Unable to parse temporal coverage", exc_info=True)
322
+
297
323
 
298
324
  def spatial_from_rdf(graph):
299
325
  geojsons = []
300
326
  for term in graph.objects(DCT.spatial):
301
327
  try:
302
- # This may not be official in the norm but some ArcGis return
328
+ # This may not be official in the norm but some ArcGis return
303
329
  # bbox as literal directly in DCT.spatial.
304
330
  if isinstance(term, Literal):
305
331
  geojson = bbox_to_geojson_multipolygon(term.toPython())
306
332
  if geojson is not None:
307
333
  geojsons.append(geojson)
308
-
334
+
309
335
  continue
310
336
 
311
337
  for object in term.objects():
312
338
  if isinstance(object, Literal):
313
- if object.datatype.__str__() == 'https://www.iana.org/assignments/media-types/application/vnd.geo+json':
339
+ if (
340
+ object.datatype.__str__()
341
+ == "https://www.iana.org/assignments/media-types/application/vnd.geo+json"
342
+ ):
314
343
  try:
315
344
  geojson = json.loads(object.toPython())
316
345
  except ValueError as e:
317
346
  log.warning(f"Invalid JSON in spatial GeoJSON {object.toPython()} {e}")
318
347
  continue
319
- elif object.datatype.__str__() == 'http://www.opengis.net/rdf#wktLiteral':
348
+ elif object.datatype.__str__() == "http://www.opengis.net/rdf#wktLiteral":
320
349
  try:
321
350
  # .upper() si here because geomet doesn't support Polygon but only POLYGON
322
351
  geojson = wkt.loads(object.toPython().strip().upper())
@@ -328,7 +357,9 @@ def spatial_from_rdf(graph):
328
357
 
329
358
  geojsons.append(geojson)
330
359
  except Exception as e:
331
- log.exception(f"Exception during `spatial_from_rdf` for term {term}: {e}", stack_info=True)
360
+ log.exception(
361
+ f"Exception during `spatial_from_rdf` for term {term}: {e}", stack_info=True
362
+ )
332
363
 
333
364
  if not geojsons:
334
365
  return None
@@ -339,16 +370,16 @@ def spatial_from_rdf(graph):
339
370
  # if there are other types of spatial coverage worth integrating (points? line strings?). But these other
340
371
  # formats are not compatible to be merged in the unique stored representation in MongoDB, we'll deal with them in a second pass.
341
372
  # The merging lose the properties and other information inside the GeoJSON…
342
- # Note that having multiple `Polygon` is not really the DCAT way of doing things, the standard require that you use
373
+ # Note that having multiple `Polygon` is not really the DCAT way of doing things, the standard require that you use
343
374
  # a `MultiPolygon` in this case. We support this right now, and wait and see if it raises problems in the future for
344
375
  # people following the standard. (see https://github.com/datagouv/data.gouv.fr/issues/1362#issuecomment-2112774115)
345
376
  polygons = []
346
377
  for geojson in geojsons:
347
- if geojson['type'] == 'Polygon':
348
- if geojson['coordinates'] not in polygons:
349
- polygons.append(geojson['coordinates'])
350
- elif geojson['type'] == 'MultiPolygon':
351
- for coordinates in geojson['coordinates']:
378
+ if geojson["type"] == "Polygon":
379
+ if geojson["coordinates"] not in polygons:
380
+ polygons.append(geojson["coordinates"])
381
+ elif geojson["type"] == "MultiPolygon":
382
+ for coordinates in geojson["coordinates"]:
352
383
  if coordinates not in polygons:
353
384
  polygons.append(coordinates)
354
385
  else:
@@ -359,10 +390,12 @@ def spatial_from_rdf(graph):
359
390
  log.warning(f"No supported types found in the GeoJSON data.")
360
391
  return None
361
392
 
362
- spatial_coverage = SpatialCoverage(geom={
363
- 'type': 'MultiPolygon',
364
- 'coordinates': polygons,
365
- })
393
+ spatial_coverage = SpatialCoverage(
394
+ geom={
395
+ "type": "MultiPolygon",
396
+ "coordinates": polygons,
397
+ }
398
+ )
366
399
 
367
400
  try:
368
401
  spatial_coverage.clean()
@@ -397,7 +430,7 @@ def mime_from_rdf(resource):
397
430
  if not mime:
398
431
  return
399
432
  if IANAFORMAT in mime:
400
- return '/'.join(mime.split('/')[-2:])
433
+ return "/".join(mime.split("/")[-2:])
401
434
  if isinstance(mime, str):
402
435
  return mime
403
436
 
@@ -413,36 +446,36 @@ def format_from_rdf(resource):
413
446
 
414
447
 
415
448
  def title_from_rdf(rdf, url):
416
- '''
449
+ """
417
450
  Try to extract a distribution title from a property.
418
451
  As it's not a mandatory property,
419
452
  it fallback on building a title from the URL
420
453
  then the format and in last ressort a generic resource name.
421
- '''
454
+ """
422
455
  title = rdf_value(rdf, DCT.title)
423
456
  if title:
424
457
  return title
425
458
  if url:
426
- last_part = url.split('/')[-1]
427
- if '.' in last_part and '?' not in last_part:
459
+ last_part = url.split("/")[-1]
460
+ if "." in last_part and "?" not in last_part:
428
461
  return last_part
429
462
  fmt = rdf_value(rdf, DCT.format)
430
- lang = current_app.config['DEFAULT_LANGUAGE']
463
+ lang = current_app.config["DEFAULT_LANGUAGE"]
431
464
  with i18n.language(lang):
432
465
  if fmt:
433
- return i18n._('{format} resource').format(format=fmt.lower())
466
+ return i18n._("{format} resource").format(format=fmt.lower())
434
467
  else:
435
- return i18n._('Nameless resource')
468
+ return i18n._("Nameless resource")
469
+
436
470
 
437
471
  def resource_from_rdf(graph_or_distrib, dataset=None, is_additionnal=False):
438
- '''
472
+ """
439
473
  Map a Resource domain model to a DCAT/RDF graph
440
- '''
474
+ """
441
475
  if isinstance(graph_or_distrib, RdfResource):
442
476
  distrib = graph_or_distrib
443
477
  else:
444
- node = graph_or_distrib.value(predicate=RDF.type,
445
- object=DCAT.Distribution)
478
+ node = graph_or_distrib.value(predicate=RDF.type, object=DCAT.Distribution)
446
479
  distrib = graph_or_distrib.resource(node)
447
480
 
448
481
  if not is_additionnal:
@@ -453,16 +486,16 @@ def resource_from_rdf(graph_or_distrib, dataset=None, is_additionnal=False):
453
486
  url = distrib.identifier.toPython() if isinstance(distrib.identifier, URIRef) else None
454
487
  # we shouldn't create resources without URLs
455
488
  if not url:
456
- log.warning(f'Resource without url: {distrib}')
489
+ log.warning(f"Resource without url: {distrib}")
457
490
  return
458
491
 
459
492
  if dataset:
460
- resource = get_by(dataset.resources, 'url', url)
493
+ resource = get_by(dataset.resources, "url", url)
461
494
  if not dataset or not resource:
462
495
  resource = Resource()
463
496
  if dataset:
464
497
  dataset.resources.append(resource)
465
- resource.filetype = 'remote'
498
+ resource.filetype = "remote"
466
499
  resource.title = title_from_rdf(distrib, url)
467
500
  resource.url = url
468
501
  resource.description = sanitize_html(distrib.value(DCT.description))
@@ -482,7 +515,7 @@ def resource_from_rdf(graph_or_distrib, dataset=None, is_additionnal=False):
482
515
  resource.checksum.value = rdf_value(checksum, SPDX.checksumValue)
483
516
  resource.checksum.type = algorithm
484
517
  if is_additionnal:
485
- resource.type = 'other'
518
+ resource.type = "other"
486
519
 
487
520
  identifier = rdf_value(distrib, DCT.identifier)
488
521
  uri = distrib.identifier.toPython() if isinstance(distrib.identifier, URIRef) else None
@@ -500,9 +533,9 @@ def resource_from_rdf(graph_or_distrib, dataset=None, is_additionnal=False):
500
533
 
501
534
 
502
535
  def dataset_from_rdf(graph: Graph, dataset=None, node=None):
503
- '''
536
+ """
504
537
  Create or update a dataset from a RDF/DCAT graph
505
- '''
538
+ """
506
539
  dataset = dataset or Dataset()
507
540
 
508
541
  if node is None: # Assume first match is the only match
@@ -542,13 +575,13 @@ def dataset_from_rdf(graph: Graph, dataset=None, node=None):
542
575
  if access_rights:
543
576
  dataset.extras["harvest"] = {
544
577
  "dct:accessRights": access_rights,
545
- **dataset.extras.get("harvest", {})
578
+ **dataset.extras.get("harvest", {}),
546
579
  }
547
580
  provenance = [p.value(RDFS.label) for p in d.objects(DCT.provenance)]
548
581
  if provenance:
549
582
  dataset.extras["harvest"] = {
550
583
  "dct:provenance": provenance,
551
- **dataset.extras.get("harvest", {})
584
+ **dataset.extras.get("harvest", {}),
552
585
  }
553
586
 
554
587
  licenses = set()
@@ -584,11 +617,12 @@ def dataset_from_rdf(graph: Graph, dataset=None, node=None):
584
617
 
585
618
  return dataset
586
619
 
587
- def bbox_to_geojson_multipolygon(bbox_as_str: str) -> dict | None:
588
- bbox = bbox_as_str.strip().split(',')
620
+
621
+ def bbox_to_geojson_multipolygon(bbox_as_str: str) -> dict | None:
622
+ bbox = bbox_as_str.strip().split(",")
589
623
  if len(bbox) != 4:
590
624
  return None
591
-
625
+
592
626
  west = float(bbox[0])
593
627
  south = float(bbox[1])
594
628
  east = float(bbox[2])
@@ -600,10 +634,10 @@ def bbox_to_geojson_multipolygon(bbox_as_str: str) -> dict | None:
600
634
  low_right = [east, south]
601
635
 
602
636
  return {
603
- 'type': 'MultiPolygon',
604
- 'coordinates': [
637
+ "type": "MultiPolygon",
638
+ "coordinates": [
605
639
  [
606
640
  [low_left, low_right, top_right, top_left, low_left],
607
- ],
641
+ ],
608
642
  ],
609
- }
643
+ }