udata 9.1.2.dev30355__py2.py3-none-any.whl → 9.1.2.dev30454__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of udata might be problematic. Click here for more details.

Files changed (413) hide show
  1. tasks/__init__.py +109 -107
  2. tasks/helpers.py +18 -18
  3. udata/__init__.py +4 -4
  4. udata/admin/views.py +5 -5
  5. udata/api/__init__.py +111 -134
  6. udata/api/commands.py +45 -37
  7. udata/api/errors.py +5 -4
  8. udata/api/fields.py +23 -21
  9. udata/api/oauth2.py +55 -74
  10. udata/api/parsers.py +15 -15
  11. udata/api/signals.py +1 -1
  12. udata/api_fields.py +137 -89
  13. udata/app.py +58 -55
  14. udata/assets.py +5 -5
  15. udata/auth/__init__.py +37 -26
  16. udata/auth/forms.py +23 -15
  17. udata/auth/helpers.py +1 -1
  18. udata/auth/mails.py +3 -3
  19. udata/auth/password_validation.py +19 -15
  20. udata/auth/views.py +94 -68
  21. udata/commands/__init__.py +71 -69
  22. udata/commands/cache.py +7 -7
  23. udata/commands/db.py +201 -140
  24. udata/commands/dcat.py +36 -30
  25. udata/commands/fixtures.py +100 -84
  26. udata/commands/images.py +21 -20
  27. udata/commands/info.py +17 -20
  28. udata/commands/init.py +10 -10
  29. udata/commands/purge.py +12 -13
  30. udata/commands/serve.py +41 -29
  31. udata/commands/static.py +16 -18
  32. udata/commands/test.py +20 -20
  33. udata/commands/tests/fixtures.py +26 -24
  34. udata/commands/worker.py +31 -33
  35. udata/core/__init__.py +12 -12
  36. udata/core/activity/__init__.py +0 -1
  37. udata/core/activity/api.py +59 -49
  38. udata/core/activity/models.py +28 -26
  39. udata/core/activity/signals.py +1 -1
  40. udata/core/activity/tasks.py +16 -10
  41. udata/core/badges/api.py +6 -6
  42. udata/core/badges/commands.py +14 -13
  43. udata/core/badges/fields.py +8 -5
  44. udata/core/badges/forms.py +7 -4
  45. udata/core/badges/models.py +16 -31
  46. udata/core/badges/permissions.py +1 -3
  47. udata/core/badges/signals.py +2 -2
  48. udata/core/badges/tasks.py +3 -2
  49. udata/core/badges/tests/test_commands.py +10 -10
  50. udata/core/badges/tests/test_model.py +24 -31
  51. udata/core/contact_point/api.py +19 -18
  52. udata/core/contact_point/api_fields.py +21 -14
  53. udata/core/contact_point/factories.py +2 -2
  54. udata/core/contact_point/forms.py +7 -6
  55. udata/core/contact_point/models.py +3 -5
  56. udata/core/dataservices/api.py +26 -21
  57. udata/core/dataservices/factories.py +13 -11
  58. udata/core/dataservices/models.py +35 -40
  59. udata/core/dataservices/permissions.py +4 -4
  60. udata/core/dataservices/rdf.py +40 -17
  61. udata/core/dataservices/tasks.py +4 -3
  62. udata/core/dataset/actions.py +10 -10
  63. udata/core/dataset/activities.py +21 -23
  64. udata/core/dataset/api.py +321 -298
  65. udata/core/dataset/api_fields.py +443 -271
  66. udata/core/dataset/apiv2.py +305 -229
  67. udata/core/dataset/commands.py +38 -36
  68. udata/core/dataset/constants.py +61 -54
  69. udata/core/dataset/csv.py +70 -74
  70. udata/core/dataset/events.py +39 -32
  71. udata/core/dataset/exceptions.py +8 -4
  72. udata/core/dataset/factories.py +57 -65
  73. udata/core/dataset/forms.py +87 -63
  74. udata/core/dataset/models.py +336 -280
  75. udata/core/dataset/permissions.py +9 -6
  76. udata/core/dataset/preview.py +15 -17
  77. udata/core/dataset/rdf.py +156 -122
  78. udata/core/dataset/search.py +92 -77
  79. udata/core/dataset/signals.py +1 -1
  80. udata/core/dataset/tasks.py +63 -54
  81. udata/core/discussions/actions.py +5 -5
  82. udata/core/discussions/api.py +124 -120
  83. udata/core/discussions/factories.py +2 -2
  84. udata/core/discussions/forms.py +9 -7
  85. udata/core/discussions/metrics.py +1 -3
  86. udata/core/discussions/models.py +25 -24
  87. udata/core/discussions/notifications.py +18 -14
  88. udata/core/discussions/permissions.py +3 -3
  89. udata/core/discussions/signals.py +4 -4
  90. udata/core/discussions/tasks.py +24 -28
  91. udata/core/followers/api.py +32 -33
  92. udata/core/followers/models.py +9 -9
  93. udata/core/followers/signals.py +3 -3
  94. udata/core/jobs/actions.py +7 -7
  95. udata/core/jobs/api.py +99 -92
  96. udata/core/jobs/commands.py +48 -49
  97. udata/core/jobs/forms.py +11 -11
  98. udata/core/jobs/models.py +6 -6
  99. udata/core/metrics/__init__.py +2 -2
  100. udata/core/metrics/commands.py +34 -30
  101. udata/core/metrics/models.py +2 -4
  102. udata/core/metrics/signals.py +1 -1
  103. udata/core/metrics/tasks.py +3 -3
  104. udata/core/organization/activities.py +12 -15
  105. udata/core/organization/api.py +167 -174
  106. udata/core/organization/api_fields.py +183 -124
  107. udata/core/organization/apiv2.py +32 -32
  108. udata/core/organization/commands.py +20 -22
  109. udata/core/organization/constants.py +11 -11
  110. udata/core/organization/csv.py +17 -15
  111. udata/core/organization/factories.py +8 -11
  112. udata/core/organization/forms.py +32 -26
  113. udata/core/organization/metrics.py +2 -1
  114. udata/core/organization/models.py +87 -67
  115. udata/core/organization/notifications.py +18 -14
  116. udata/core/organization/permissions.py +10 -11
  117. udata/core/organization/rdf.py +14 -14
  118. udata/core/organization/search.py +30 -28
  119. udata/core/organization/signals.py +7 -7
  120. udata/core/organization/tasks.py +42 -61
  121. udata/core/owned.py +38 -27
  122. udata/core/post/api.py +82 -81
  123. udata/core/post/constants.py +8 -5
  124. udata/core/post/factories.py +4 -4
  125. udata/core/post/forms.py +13 -14
  126. udata/core/post/models.py +20 -22
  127. udata/core/post/tests/test_api.py +30 -32
  128. udata/core/reports/api.py +8 -7
  129. udata/core/reports/constants.py +1 -3
  130. udata/core/reports/models.py +10 -10
  131. udata/core/reuse/activities.py +15 -19
  132. udata/core/reuse/api.py +123 -126
  133. udata/core/reuse/api_fields.py +120 -85
  134. udata/core/reuse/apiv2.py +11 -10
  135. udata/core/reuse/constants.py +23 -23
  136. udata/core/reuse/csv.py +18 -18
  137. udata/core/reuse/factories.py +5 -9
  138. udata/core/reuse/forms.py +24 -21
  139. udata/core/reuse/models.py +55 -51
  140. udata/core/reuse/permissions.py +2 -2
  141. udata/core/reuse/search.py +49 -46
  142. udata/core/reuse/signals.py +1 -1
  143. udata/core/reuse/tasks.py +4 -5
  144. udata/core/site/api.py +47 -50
  145. udata/core/site/factories.py +2 -2
  146. udata/core/site/forms.py +4 -5
  147. udata/core/site/models.py +94 -63
  148. udata/core/site/rdf.py +14 -14
  149. udata/core/spam/api.py +16 -9
  150. udata/core/spam/constants.py +4 -4
  151. udata/core/spam/fields.py +13 -7
  152. udata/core/spam/models.py +27 -20
  153. udata/core/spam/signals.py +1 -1
  154. udata/core/spam/tests/test_spam.py +6 -5
  155. udata/core/spatial/api.py +72 -80
  156. udata/core/spatial/api_fields.py +73 -58
  157. udata/core/spatial/commands.py +67 -64
  158. udata/core/spatial/constants.py +3 -3
  159. udata/core/spatial/factories.py +37 -54
  160. udata/core/spatial/forms.py +27 -26
  161. udata/core/spatial/geoids.py +17 -17
  162. udata/core/spatial/models.py +43 -47
  163. udata/core/spatial/tasks.py +2 -1
  164. udata/core/spatial/tests/test_api.py +115 -130
  165. udata/core/spatial/tests/test_fields.py +74 -77
  166. udata/core/spatial/tests/test_geoid.py +22 -22
  167. udata/core/spatial/tests/test_models.py +5 -7
  168. udata/core/spatial/translations.py +16 -16
  169. udata/core/storages/__init__.py +16 -18
  170. udata/core/storages/api.py +66 -64
  171. udata/core/storages/tasks.py +7 -7
  172. udata/core/storages/utils.py +15 -15
  173. udata/core/storages/views.py +5 -6
  174. udata/core/tags/api.py +17 -14
  175. udata/core/tags/csv.py +4 -4
  176. udata/core/tags/models.py +8 -5
  177. udata/core/tags/tasks.py +11 -13
  178. udata/core/tags/views.py +4 -4
  179. udata/core/topic/api.py +84 -73
  180. udata/core/topic/apiv2.py +157 -127
  181. udata/core/topic/factories.py +3 -4
  182. udata/core/topic/forms.py +12 -14
  183. udata/core/topic/models.py +14 -19
  184. udata/core/topic/parsers.py +26 -26
  185. udata/core/user/activities.py +30 -29
  186. udata/core/user/api.py +151 -152
  187. udata/core/user/api_fields.py +132 -100
  188. udata/core/user/apiv2.py +7 -7
  189. udata/core/user/commands.py +38 -38
  190. udata/core/user/factories.py +8 -9
  191. udata/core/user/forms.py +14 -11
  192. udata/core/user/metrics.py +2 -2
  193. udata/core/user/models.py +68 -69
  194. udata/core/user/permissions.py +4 -5
  195. udata/core/user/rdf.py +7 -8
  196. udata/core/user/tasks.py +2 -2
  197. udata/core/user/tests/test_user_model.py +24 -16
  198. udata/cors.py +99 -0
  199. udata/db/tasks.py +2 -1
  200. udata/entrypoints.py +35 -31
  201. udata/errors.py +2 -1
  202. udata/event/values.py +6 -6
  203. udata/factories.py +2 -2
  204. udata/features/identicon/api.py +5 -6
  205. udata/features/identicon/backends.py +48 -55
  206. udata/features/identicon/tests/test_backends.py +4 -5
  207. udata/features/notifications/__init__.py +0 -1
  208. udata/features/notifications/actions.py +9 -9
  209. udata/features/notifications/api.py +17 -13
  210. udata/features/territories/__init__.py +12 -10
  211. udata/features/territories/api.py +14 -15
  212. udata/features/territories/models.py +23 -28
  213. udata/features/transfer/actions.py +8 -11
  214. udata/features/transfer/api.py +84 -77
  215. udata/features/transfer/factories.py +2 -1
  216. udata/features/transfer/models.py +11 -12
  217. udata/features/transfer/notifications.py +19 -15
  218. udata/features/transfer/permissions.py +5 -5
  219. udata/forms/__init__.py +5 -2
  220. udata/forms/fields.py +164 -172
  221. udata/forms/validators.py +19 -22
  222. udata/forms/widgets.py +9 -13
  223. udata/frontend/__init__.py +31 -26
  224. udata/frontend/csv.py +68 -58
  225. udata/frontend/markdown.py +40 -44
  226. udata/harvest/actions.py +89 -77
  227. udata/harvest/api.py +294 -238
  228. udata/harvest/backends/__init__.py +4 -4
  229. udata/harvest/backends/base.py +128 -111
  230. udata/harvest/backends/dcat.py +80 -66
  231. udata/harvest/commands.py +56 -60
  232. udata/harvest/csv.py +8 -8
  233. udata/harvest/exceptions.py +6 -3
  234. udata/harvest/filters.py +24 -23
  235. udata/harvest/forms.py +27 -28
  236. udata/harvest/models.py +88 -80
  237. udata/harvest/notifications.py +15 -10
  238. udata/harvest/signals.py +13 -13
  239. udata/harvest/tasks.py +11 -10
  240. udata/harvest/tests/factories.py +23 -24
  241. udata/harvest/tests/test_actions.py +136 -166
  242. udata/harvest/tests/test_api.py +220 -214
  243. udata/harvest/tests/test_base_backend.py +117 -112
  244. udata/harvest/tests/test_dcat_backend.py +380 -308
  245. udata/harvest/tests/test_filters.py +33 -22
  246. udata/harvest/tests/test_models.py +11 -14
  247. udata/harvest/tests/test_notifications.py +6 -7
  248. udata/harvest/tests/test_tasks.py +7 -6
  249. udata/i18n.py +237 -78
  250. udata/linkchecker/backends.py +5 -11
  251. udata/linkchecker/checker.py +23 -22
  252. udata/linkchecker/commands.py +4 -6
  253. udata/linkchecker/models.py +6 -6
  254. udata/linkchecker/tasks.py +18 -20
  255. udata/mail.py +21 -21
  256. udata/migrations/2020-07-24-remove-s-from-scope-oauth.py +9 -8
  257. udata/migrations/2020-08-24-add-fs-filename.py +9 -8
  258. udata/migrations/2020-09-28-update-reuses-datasets-metrics.py +5 -4
  259. udata/migrations/2020-10-16-migrate-ods-resources.py +9 -10
  260. udata/migrations/2021-04-08-update-schema-with-new-structure.py +8 -7
  261. udata/migrations/2021-05-27-fix-default-schema-name.py +7 -6
  262. udata/migrations/2021-07-05-remove-unused-badges.py +17 -15
  263. udata/migrations/2021-07-07-update-schema-for-community-resources.py +7 -6
  264. udata/migrations/2021-08-17-follow-integrity.py +5 -4
  265. udata/migrations/2021-08-17-harvest-integrity.py +13 -12
  266. udata/migrations/2021-08-17-oauth2client-integrity.py +5 -4
  267. udata/migrations/2021-08-17-transfer-integrity.py +5 -4
  268. udata/migrations/2021-08-17-users-integrity.py +9 -8
  269. udata/migrations/2021-12-14-reuse-topics.py +7 -6
  270. udata/migrations/2022-04-21-improve-extension-detection.py +8 -7
  271. udata/migrations/2022-09-22-clean-inactive-harvest-datasets.py +16 -14
  272. udata/migrations/2022-10-10-add-fs_uniquifier-to-user-model.py +6 -6
  273. udata/migrations/2022-10-10-migrate-harvest-extras.py +36 -26
  274. udata/migrations/2023-02-08-rename-internal-dates.py +46 -28
  275. udata/migrations/2024-01-29-fix-reuse-and-dataset-with-private-None.py +10 -8
  276. udata/migrations/2024-03-22-migrate-activity-kwargs-to-extras.py +6 -4
  277. udata/migrations/2024-06-11-fix-reuse-datasets-references.py +7 -6
  278. udata/migrations/__init__.py +123 -105
  279. udata/models/__init__.py +4 -4
  280. udata/mongo/__init__.py +13 -11
  281. udata/mongo/badges_field.py +3 -2
  282. udata/mongo/datetime_fields.py +13 -12
  283. udata/mongo/document.py +17 -16
  284. udata/mongo/engine.py +15 -16
  285. udata/mongo/errors.py +2 -1
  286. udata/mongo/extras_fields.py +30 -20
  287. udata/mongo/queryset.py +12 -12
  288. udata/mongo/slug_fields.py +38 -28
  289. udata/mongo/taglist_field.py +1 -2
  290. udata/mongo/url_field.py +5 -5
  291. udata/mongo/uuid_fields.py +4 -3
  292. udata/notifications/__init__.py +1 -1
  293. udata/notifications/mattermost.py +10 -9
  294. udata/rdf.py +167 -188
  295. udata/routing.py +40 -45
  296. udata/search/__init__.py +18 -19
  297. udata/search/adapter.py +17 -16
  298. udata/search/commands.py +44 -51
  299. udata/search/fields.py +13 -20
  300. udata/search/query.py +23 -18
  301. udata/search/result.py +9 -10
  302. udata/sentry.py +21 -19
  303. udata/settings.py +262 -198
  304. udata/sitemap.py +8 -6
  305. udata/storage/s3.py +20 -13
  306. udata/tags.py +4 -5
  307. udata/tasks.py +43 -42
  308. udata/tests/__init__.py +9 -6
  309. udata/tests/api/__init__.py +8 -6
  310. udata/tests/api/test_auth_api.py +395 -321
  311. udata/tests/api/test_base_api.py +33 -35
  312. udata/tests/api/test_contact_points.py +7 -9
  313. udata/tests/api/test_dataservices_api.py +211 -158
  314. udata/tests/api/test_datasets_api.py +823 -812
  315. udata/tests/api/test_follow_api.py +13 -15
  316. udata/tests/api/test_me_api.py +95 -112
  317. udata/tests/api/test_organizations_api.py +301 -339
  318. udata/tests/api/test_reports_api.py +35 -25
  319. udata/tests/api/test_reuses_api.py +134 -139
  320. udata/tests/api/test_swagger.py +5 -5
  321. udata/tests/api/test_tags_api.py +18 -25
  322. udata/tests/api/test_topics_api.py +94 -94
  323. udata/tests/api/test_transfer_api.py +53 -48
  324. udata/tests/api/test_user_api.py +128 -141
  325. udata/tests/apiv2/test_datasets.py +290 -198
  326. udata/tests/apiv2/test_me_api.py +10 -11
  327. udata/tests/apiv2/test_organizations.py +56 -74
  328. udata/tests/apiv2/test_swagger.py +5 -5
  329. udata/tests/apiv2/test_topics.py +69 -87
  330. udata/tests/cli/test_cli_base.py +8 -8
  331. udata/tests/cli/test_db_cli.py +21 -19
  332. udata/tests/dataservice/test_dataservice_tasks.py +8 -12
  333. udata/tests/dataset/test_csv_adapter.py +44 -35
  334. udata/tests/dataset/test_dataset_actions.py +2 -3
  335. udata/tests/dataset/test_dataset_commands.py +7 -8
  336. udata/tests/dataset/test_dataset_events.py +36 -29
  337. udata/tests/dataset/test_dataset_model.py +224 -217
  338. udata/tests/dataset/test_dataset_rdf.py +142 -131
  339. udata/tests/dataset/test_dataset_tasks.py +15 -15
  340. udata/tests/dataset/test_resource_preview.py +10 -13
  341. udata/tests/features/territories/__init__.py +9 -13
  342. udata/tests/features/territories/test_territories_api.py +71 -91
  343. udata/tests/forms/test_basic_fields.py +7 -7
  344. udata/tests/forms/test_current_user_field.py +39 -66
  345. udata/tests/forms/test_daterange_field.py +31 -39
  346. udata/tests/forms/test_dict_field.py +28 -26
  347. udata/tests/forms/test_extras_fields.py +102 -76
  348. udata/tests/forms/test_form_field.py +8 -8
  349. udata/tests/forms/test_image_field.py +33 -26
  350. udata/tests/forms/test_model_field.py +134 -123
  351. udata/tests/forms/test_model_list_field.py +7 -7
  352. udata/tests/forms/test_nested_model_list_field.py +117 -79
  353. udata/tests/forms/test_publish_as_field.py +36 -65
  354. udata/tests/forms/test_reference_field.py +34 -53
  355. udata/tests/forms/test_user_forms.py +23 -21
  356. udata/tests/forms/test_uuid_field.py +6 -10
  357. udata/tests/frontend/__init__.py +9 -6
  358. udata/tests/frontend/test_auth.py +7 -6
  359. udata/tests/frontend/test_csv.py +81 -96
  360. udata/tests/frontend/test_hooks.py +43 -43
  361. udata/tests/frontend/test_markdown.py +211 -191
  362. udata/tests/helpers.py +32 -37
  363. udata/tests/models.py +2 -2
  364. udata/tests/organization/test_csv_adapter.py +21 -16
  365. udata/tests/organization/test_notifications.py +11 -18
  366. udata/tests/organization/test_organization_model.py +13 -13
  367. udata/tests/organization/test_organization_rdf.py +29 -22
  368. udata/tests/organization/test_organization_tasks.py +16 -17
  369. udata/tests/plugin.py +79 -73
  370. udata/tests/reuse/test_reuse_model.py +21 -21
  371. udata/tests/reuse/test_reuse_task.py +11 -13
  372. udata/tests/search/__init__.py +11 -12
  373. udata/tests/search/test_adapter.py +60 -70
  374. udata/tests/search/test_query.py +16 -16
  375. udata/tests/search/test_results.py +10 -7
  376. udata/tests/site/test_site_api.py +11 -16
  377. udata/tests/site/test_site_metrics.py +20 -30
  378. udata/tests/site/test_site_model.py +4 -5
  379. udata/tests/site/test_site_rdf.py +94 -78
  380. udata/tests/test_activity.py +17 -17
  381. udata/tests/test_cors.py +62 -0
  382. udata/tests/test_discussions.py +292 -299
  383. udata/tests/test_i18n.py +37 -40
  384. udata/tests/test_linkchecker.py +91 -85
  385. udata/tests/test_mail.py +13 -17
  386. udata/tests/test_migrations.py +219 -180
  387. udata/tests/test_model.py +164 -157
  388. udata/tests/test_notifications.py +17 -17
  389. udata/tests/test_owned.py +14 -14
  390. udata/tests/test_rdf.py +25 -23
  391. udata/tests/test_routing.py +89 -93
  392. udata/tests/test_storages.py +137 -128
  393. udata/tests/test_tags.py +44 -46
  394. udata/tests/test_topics.py +7 -7
  395. udata/tests/test_transfer.py +42 -49
  396. udata/tests/test_uris.py +160 -161
  397. udata/tests/test_utils.py +79 -71
  398. udata/tests/user/test_user_rdf.py +5 -9
  399. udata/tests/workers/test_jobs_commands.py +57 -58
  400. udata/tests/workers/test_tasks_routing.py +23 -29
  401. udata/tests/workers/test_workers_api.py +125 -131
  402. udata/tests/workers/test_workers_helpers.py +6 -6
  403. udata/tracking.py +4 -6
  404. udata/uris.py +45 -46
  405. udata/utils.py +68 -66
  406. udata/wsgi.py +1 -1
  407. {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30454.dist-info}/METADATA +7 -3
  408. udata-9.1.2.dev30454.dist-info/RECORD +706 -0
  409. udata-9.1.2.dev30355.dist-info/RECORD +0 -704
  410. {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30454.dist-info}/LICENSE +0 -0
  411. {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30454.dist-info}/WHEEL +0 -0
  412. {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30454.dist-info}/entry_points.txt +0 -0
  413. {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30454.dist-info}/top_level.txt +0 -0
@@ -1,50 +1,70 @@
1
1
  import logging
2
-
3
2
  from datetime import datetime, timedelta
3
+ from pydoc import locate
4
4
  from urllib.parse import urlparse
5
5
 
6
+ import requests
6
7
  from blinker import signal
7
8
  from dateutil.parser import parse as parse_dt
8
9
  from flask import current_app
9
- from mongoengine import DynamicEmbeddedDocument, ValidationError as MongoEngineValidationError
10
- from mongoengine.signals import pre_save, post_save
10
+ from mongoengine import DynamicEmbeddedDocument
11
+ from mongoengine import ValidationError as MongoEngineValidationError
11
12
  from mongoengine.fields import DateTimeField
12
- from pydoc import locate
13
+ from mongoengine.signals import post_save, pre_save
13
14
  from stringdist import rdlevenshtein
14
15
  from werkzeug.utils import cached_property
15
- import requests
16
16
 
17
17
  from udata.app import cache
18
18
  from udata.core import storages
19
+ from udata.core.owned import Owned, OwnedQuerySet
19
20
  from udata.frontend.markdown import mdstrip
20
- from udata.models import db, WithMetrics, BadgeMixin, SpatialCoverage
21
- from udata.mongo.errors import FieldValidationError
22
21
  from udata.i18n import lazy_gettext as _
23
- from udata.utils import get_by, hash_url, to_naive_datetime
22
+ from udata.models import BadgeMixin, SpatialCoverage, WithMetrics, db
23
+ from udata.mongo.errors import FieldValidationError
24
24
  from udata.uris import ValidationError, endpoint_for
25
25
  from udata.uris import validate as validate_url
26
- from udata.core.owned import Owned, OwnedQuerySet
27
- from .constants import CHECKSUM_TYPES, CLOSED_FORMATS, DEFAULT_LICENSE, LEGACY_FREQUENCIES, MAX_DISTANCE, PIVOTAL_DATA, RESOURCE_FILETYPES, RESOURCE_TYPES, SCHEMA_CACHE_DURATION, UPDATE_FREQUENCIES
26
+ from udata.utils import get_by, hash_url, to_naive_datetime
28
27
 
29
- from .preview import get_preview_url
28
+ from .constants import (
29
+ CHECKSUM_TYPES,
30
+ CLOSED_FORMATS,
31
+ DEFAULT_LICENSE,
32
+ LEGACY_FREQUENCIES,
33
+ MAX_DISTANCE,
34
+ PIVOTAL_DATA,
35
+ RESOURCE_FILETYPES,
36
+ RESOURCE_TYPES,
37
+ SCHEMA_CACHE_DURATION,
38
+ UPDATE_FREQUENCIES,
39
+ )
30
40
  from .exceptions import (
31
- SchemasCatalogNotFoundException, SchemasCacheUnavailableException
41
+ SchemasCacheUnavailableException,
42
+ SchemasCatalogNotFoundException,
32
43
  )
44
+ from .preview import get_preview_url
33
45
 
34
- __all__ = ('License', 'Resource', 'Schema', 'Dataset', 'Checksum', 'CommunityResource', 'ResourceSchema')
46
+ __all__ = (
47
+ "License",
48
+ "Resource",
49
+ "Schema",
50
+ "Dataset",
51
+ "Checksum",
52
+ "CommunityResource",
53
+ "ResourceSchema",
54
+ )
35
55
 
36
- NON_ASSIGNABLE_SCHEMA_TYPES = ['datapackage']
56
+ NON_ASSIGNABLE_SCHEMA_TYPES = ["datapackage"]
37
57
 
38
58
  log = logging.getLogger(__name__)
39
59
 
40
60
 
41
61
  def get_json_ld_extra(key, value):
42
- '''Serialize an extras key, value pair into JSON-LD'''
43
- value = value.serialize() if hasattr(value, 'serialize') else value
62
+ """Serialize an extras key, value pair into JSON-LD"""
63
+ value = value.serialize() if hasattr(value, "serialize") else value
44
64
  return {
45
- '@type': 'http://schema.org/PropertyValue',
46
- 'name': key,
47
- 'value': value,
65
+ "@type": "http://schema.org/PropertyValue",
66
+ "name": key,
67
+ "value": value,
48
68
  }
49
69
 
50
70
 
@@ -75,6 +95,7 @@ class Schema(db.EmbeddedDocument):
75
95
  - Known schema: url is not set, name is set, version is maybe set
76
96
  - Unknown schema: url is set, name and version are maybe set
77
97
  """
98
+
78
99
  url = db.URLField()
79
100
  name = db.StringField()
80
101
  version = db.StringField()
@@ -91,20 +112,23 @@ class Schema(db.EmbeddedDocument):
91
112
 
92
113
  def to_dict(self):
93
114
  return {
94
- 'url': self.url,
95
- 'name': self.name,
96
- 'version': self.version,
115
+ "url": self.url,
116
+ "name": self.name,
117
+ "version": self.version,
97
118
  }
98
119
 
99
120
  def clean(self, **kwargs):
100
121
  super().clean()
101
122
 
102
- check_schema_in_catalog = kwargs.get('check_schema_in_catalog', False)
123
+ check_schema_in_catalog = kwargs.get("check_schema_in_catalog", False)
103
124
 
104
125
  if not self.url and not self.name:
105
126
  # There is no schema.
106
127
  if self.version:
107
- raise FieldValidationError(_('A schema must contains a name or an URL when a version is provided.'), field='version')
128
+ raise FieldValidationError(
129
+ _("A schema must contains a name or an URL when a version is provided."),
130
+ field="version",
131
+ )
108
132
 
109
133
  return
110
134
 
@@ -115,11 +139,11 @@ class Schema(db.EmbeddedDocument):
115
139
  self.url = None
116
140
  self.name = info[0]
117
141
  self.version = info[1]
118
-
142
+
119
143
  # Nothing more to do since an URL can point to anywhere and have a random name/version
120
144
  return
121
145
 
122
- # All the following checks are only run if there is
146
+ # All the following checks are only run if there is
123
147
  # some schemas in the catalog. If there is no catalog
124
148
  # or no schema in the catalog we do not check the validity
125
149
  # of the name and version
@@ -128,31 +152,35 @@ class Schema(db.EmbeddedDocument):
128
152
  return
129
153
 
130
154
  # We know this schema so we can do some checks
131
- existing_schema = next((schema for schema in catalog_schemas if schema['name'] == self.name), None)
155
+ existing_schema = next(
156
+ (schema for schema in catalog_schemas if schema["name"] == self.name), None
157
+ )
132
158
 
133
159
  if not existing_schema:
134
- message = _('Schema name "{schema}" is not an allowed value. Allowed values: {values}').format(
160
+ message = _(
161
+ 'Schema name "{schema}" is not an allowed value. Allowed values: {values}'
162
+ ).format(
135
163
  schema=self.name,
136
- values=', '.join(map(lambda schema: schema['name'], catalog_schemas))
164
+ values=", ".join(map(lambda schema: schema["name"], catalog_schemas)),
137
165
  )
138
166
  if check_schema_in_catalog:
139
- raise FieldValidationError(message, field='name')
167
+ raise FieldValidationError(message, field="name")
140
168
  else:
141
169
  log.warning(message)
142
170
  return
143
171
 
144
172
  if self.version:
145
- allowed_versions = list(map(lambda version: version['version_name'], existing_schema['versions']))
146
- allowed_versions.append('latest')
173
+ allowed_versions = list(
174
+ map(lambda version: version["version_name"], existing_schema["versions"])
175
+ )
176
+ allowed_versions.append("latest")
147
177
 
148
178
  if self.version not in allowed_versions:
149
- message = _('Version "{version}" is not an allowed value for the schema "{name}". Allowed versions: {values}').format(
150
- version=self.version,
151
- name=self.name,
152
- values=', '.join(allowed_versions)
153
- )
179
+ message = _(
180
+ 'Version "{version}" is not an allowed value for the schema "{name}". Allowed versions: {values}'
181
+ ).format(version=self.version, name=self.name, values=", ".join(allowed_versions))
154
182
  if check_schema_in_catalog:
155
- raise FieldValidationError(message, field='version')
183
+ raise FieldValidationError(message, field="version")
156
184
  else:
157
185
  log.warning(message)
158
186
  return
@@ -165,7 +193,7 @@ class License(db.Document):
165
193
  created_at = db.DateTimeField(default=datetime.utcnow, required=True)
166
194
  title = db.StringField(required=True)
167
195
  alternate_titles = db.ListField(db.StringField())
168
- slug = db.SlugField(required=True, populate_from='title')
196
+ slug = db.SlugField(required=True, populate_from="title")
169
197
  url = db.URLField()
170
198
  alternate_urls = db.ListField(db.URLField())
171
199
  maintainer = db.StringField()
@@ -178,34 +206,36 @@ class License(db.Document):
178
206
 
179
207
  @classmethod
180
208
  def guess(cls, *strings, **kwargs):
181
- '''
209
+ """
182
210
  Try to guess a license from a list of strings.
183
211
 
184
212
  Accept a `default` keyword argument which will be
185
213
  the default fallback license.
186
- '''
214
+ """
187
215
  license = None
188
216
  for string in strings:
189
217
  license = cls.guess_one(string)
190
218
  if license:
191
219
  break
192
- return license or kwargs.get('default')
220
+ return license or kwargs.get("default")
193
221
 
194
222
  @classmethod
195
223
  def guess_one(cls, text):
196
- '''
224
+ """
197
225
  Try to guess license from a string.
198
226
 
199
227
  Try to exact match on identifier then slugified title
200
228
  and fallback on edit distance ranking (after slugification)
201
- '''
229
+ """
202
230
  if not text:
203
231
  return
204
232
  qs = cls.objects
205
233
  text = text.strip().lower() # Stored identifiers are lower case
206
234
  slug = cls.slug.slugify(text) # Use slug as it normalize string
207
235
  license = qs(
208
- db.Q(id__iexact=text) | db.Q(slug=slug) | db.Q(url__iexact=text)
236
+ db.Q(id__iexact=text)
237
+ | db.Q(slug=slug)
238
+ | db.Q(url__iexact=text)
209
239
  | db.Q(alternate_urls__iexact=text)
210
240
  ).first()
211
241
 
@@ -218,9 +248,11 @@ class License(db.Document):
218
248
  pass
219
249
  else:
220
250
  parsed = urlparse(url)
221
- path = parsed.path.rstrip('/')
222
- query = f'{parsed.netloc}{path}'
223
- license = qs(db.Q(url__icontains=query) | db.Q(alternate_urls__contains=query)).first()
251
+ path = parsed.path.rstrip("/")
252
+ query = f"{parsed.netloc}{path}"
253
+ license = qs(
254
+ db.Q(url__icontains=query) | db.Q(alternate_urls__contains=query)
255
+ ).first()
224
256
 
225
257
  if license is None:
226
258
  # Try to single match `slug` with a low Damerau-Levenshtein distance
@@ -256,7 +288,7 @@ class License(db.Document):
256
288
 
257
289
  @classmethod
258
290
  def default(cls):
259
- return cls.objects(id=DEFAULT_LICENSE['id']).first()
291
+ return cls.objects(id=DEFAULT_LICENSE["id"]).first()
260
292
 
261
293
 
262
294
  class DatasetQuerySet(OwnedQuerySet):
@@ -264,9 +296,7 @@ class DatasetQuerySet(OwnedQuerySet):
264
296
  return self(private__ne=True, deleted=None, archived=None)
265
297
 
266
298
  def hidden(self):
267
- return self(db.Q(private=True) |
268
- db.Q(deleted__ne=None) |
269
- db.Q(archived__ne=None))
299
+ return self(db.Q(private=True) | db.Q(deleted__ne=None) | db.Q(archived__ne=None))
270
300
 
271
301
 
272
302
  class Checksum(db.EmbeddedDocument):
@@ -282,10 +312,8 @@ class ResourceMixin(object):
282
312
  id = db.AutoUUIDField(primary_key=True)
283
313
  title = db.StringField(verbose_name="Title", required=True)
284
314
  description = db.StringField()
285
- filetype = db.StringField(
286
- choices=list(RESOURCE_FILETYPES), default='file', required=True)
287
- type = db.StringField(
288
- choices=list(RESOURCE_TYPES), default='main', required=True)
315
+ filetype = db.StringField(choices=list(RESOURCE_FILETYPES), default="file", required=True)
316
+ type = db.StringField(choices=list(RESOURCE_TYPES), default="main", required=True)
289
317
  url = db.URLField(required=True)
290
318
  urlhash = db.StringField()
291
319
  checksum = db.EmbeddedDocumentField(Checksum)
@@ -296,7 +324,7 @@ class ResourceMixin(object):
296
324
  extras = db.ExtrasField()
297
325
  harvest = db.EmbeddedDocumentField(HarvestResourceMetadata)
298
326
  schema = db.EmbeddedDocumentField(Schema)
299
-
327
+
300
328
  created_at_internal = db.DateTimeField(default=datetime.utcnow, required=True)
301
329
  last_modified_internal = db.DateTimeField(default=datetime.utcnow, required=True)
302
330
  deleted = db.DateTimeField()
@@ -304,25 +332,33 @@ class ResourceMixin(object):
304
332
  @property
305
333
  def internal(self):
306
334
  return {
307
- 'created_at_internal': self.created_at_internal,
308
- 'last_modified_internal': self.last_modified_internal
335
+ "created_at_internal": self.created_at_internal,
336
+ "last_modified_internal": self.last_modified_internal,
309
337
  }
310
338
 
311
339
  @property
312
340
  def created_at(self):
313
- return self.harvest.created_at if self.harvest and self.harvest.created_at else self.created_at_internal
341
+ return (
342
+ self.harvest.created_at
343
+ if self.harvest and self.harvest.created_at
344
+ else self.created_at_internal
345
+ )
314
346
 
315
347
  @property
316
348
  def last_modified(self):
317
- if self.harvest and self.harvest.modified_at and to_naive_datetime(self.harvest.modified_at) < datetime.utcnow():
349
+ if (
350
+ self.harvest
351
+ and self.harvest.modified_at
352
+ and to_naive_datetime(self.harvest.modified_at) < datetime.utcnow()
353
+ ):
318
354
  return to_naive_datetime(self.harvest.modified_at)
319
- if self.filetype == 'remote' and self.extras.get('analysis:last-modified-at'):
320
- return to_naive_datetime(self.extras.get('analysis:last-modified-at'))
355
+ if self.filetype == "remote" and self.extras.get("analysis:last-modified-at"):
356
+ return to_naive_datetime(self.extras.get("analysis:last-modified-at"))
321
357
  return self.last_modified_internal
322
358
 
323
359
  def clean(self):
324
360
  super(ResourceMixin, self).clean()
325
- if not self.urlhash or 'url' in self._get_changed_fields():
361
+ if not self.urlhash or "url" in self._get_changed_fields():
326
362
  self.urlhash = hash_url(self.url)
327
363
 
328
364
  @cached_property # Accessed at least 2 times in front rendering
@@ -338,40 +374,40 @@ class ResourceMixin(object):
338
374
  return not self.format or self.format.lower() in CLOSED_FORMATS
339
375
 
340
376
  def check_availability(self):
341
- '''
377
+ """
342
378
  Return the check status from extras if any.
343
379
 
344
380
  NB: `unknown` will evaluate to True in the aggregate checks using
345
381
  `all([])` (dataset, organization, user).
346
- '''
347
- return self.extras.get('check:available', 'unknown')
382
+ """
383
+ return self.extras.get("check:available", "unknown")
348
384
 
349
385
  def need_check(self):
350
- '''Does the resource needs to be checked against its linkchecker?
386
+ """Does the resource needs to be checked against its linkchecker?
351
387
 
352
388
  We check unavailable resources often, unless they go over the
353
389
  threshold. Available resources are checked less and less frequently
354
390
  based on their historical availability.
355
- '''
391
+ """
356
392
  min_cache_duration, max_cache_duration, ko_threshold = [
357
- current_app.config.get(k) for k in (
358
- 'LINKCHECKING_MIN_CACHE_DURATION',
359
- 'LINKCHECKING_MAX_CACHE_DURATION',
360
- 'LINKCHECKING_UNAVAILABLE_THRESHOLD',
393
+ current_app.config.get(k)
394
+ for k in (
395
+ "LINKCHECKING_MIN_CACHE_DURATION",
396
+ "LINKCHECKING_MAX_CACHE_DURATION",
397
+ "LINKCHECKING_UNAVAILABLE_THRESHOLD",
361
398
  )
362
399
  ]
363
- count_availability = self.extras.get('check:count-availability', 1)
400
+ count_availability = self.extras.get("check:count-availability", 1)
364
401
  is_available = self.check_availability()
365
- if is_available == 'unknown':
402
+ if is_available == "unknown":
366
403
  return True
367
404
  elif is_available or count_availability > ko_threshold:
368
- delta = min(min_cache_duration * count_availability,
369
- max_cache_duration)
405
+ delta = min(min_cache_duration * count_availability, max_cache_duration)
370
406
  else:
371
407
  delta = min_cache_duration
372
- if self.extras.get('check:date'):
408
+ if self.extras.get("check:date"):
373
409
  limit_date = datetime.utcnow() - timedelta(minutes=delta)
374
- check_date = self.extras['check:date']
410
+ check_date = self.extras["check:date"]
375
411
  if not isinstance(check_date, datetime):
376
412
  try:
377
413
  check_date = parse_dt(check_date)
@@ -383,62 +419,63 @@ class ResourceMixin(object):
383
419
 
384
420
  @property
385
421
  def latest(self):
386
- '''
422
+ """
387
423
  Permanent link to the latest version of this resource.
388
424
 
389
425
  If this resource is updated and `url` changes, this property won't.
390
- '''
391
- return endpoint_for('datasets.resource', 'api.resource_redirect', id=self.id, _external=True)
426
+ """
427
+ return endpoint_for(
428
+ "datasets.resource", "api.resource_redirect", id=self.id, _external=True
429
+ )
392
430
 
393
431
  @cached_property
394
432
  def json_ld(self):
395
-
396
433
  result = {
397
- '@type': 'DataDownload',
398
- '@id': str(self.id),
399
- 'url': self.latest,
400
- 'name': self.title or _('Nameless resource'),
401
- 'contentUrl': self.url,
402
- 'dateCreated': self.created_at.isoformat(),
403
- 'dateModified': self.last_modified.isoformat(),
404
- 'extras': [get_json_ld_extra(*item)
405
- for item in self.extras.items()],
434
+ "@type": "DataDownload",
435
+ "@id": str(self.id),
436
+ "url": self.latest,
437
+ "name": self.title or _("Nameless resource"),
438
+ "contentUrl": self.url,
439
+ "dateCreated": self.created_at.isoformat(),
440
+ "dateModified": self.last_modified.isoformat(),
441
+ "extras": [get_json_ld_extra(*item) for item in self.extras.items()],
406
442
  }
407
443
 
408
- if 'views' in self.metrics:
409
- result['interactionStatistic'] = {
410
- '@type': 'InteractionCounter',
411
- 'interactionType': {
412
- '@type': 'DownloadAction',
444
+ if "views" in self.metrics:
445
+ result["interactionStatistic"] = {
446
+ "@type": "InteractionCounter",
447
+ "interactionType": {
448
+ "@type": "DownloadAction",
413
449
  },
414
- 'userInteractionCount': self.metrics['views']
450
+ "userInteractionCount": self.metrics["views"],
415
451
  }
416
452
 
417
453
  if self.format:
418
- result['encodingFormat'] = self.format
454
+ result["encodingFormat"] = self.format
419
455
 
420
456
  if self.filesize:
421
- result['contentSize'] = self.filesize
457
+ result["contentSize"] = self.filesize
422
458
 
423
459
  if self.mime:
424
- result['fileFormat'] = self.mime
460
+ result["fileFormat"] = self.mime
425
461
 
426
462
  if self.description:
427
- result['description'] = mdstrip(self.description)
463
+ result["description"] = mdstrip(self.description)
428
464
 
429
465
  return result
430
466
 
431
467
 
432
468
  class Resource(ResourceMixin, WithMetrics, db.EmbeddedDocument):
433
- '''
469
+ """
434
470
  Local file, remote file or API provided by the original provider of the
435
471
  dataset
436
- '''
437
- on_added = signal('Resource.on_added')
438
- on_deleted = signal('Resource.on_deleted')
472
+ """
473
+
474
+ on_added = signal("Resource.on_added")
475
+ on_deleted = signal("Resource.on_deleted")
439
476
 
440
477
  __metrics_keys__ = [
441
- 'views',
478
+ "views",
442
479
  ]
443
480
 
444
481
  @property
@@ -447,13 +484,15 @@ class Resource(ResourceMixin, WithMetrics, db.EmbeddedDocument):
447
484
  self._instance.id # try to access attr from parent instance
448
485
  return self._instance
449
486
  except ReferenceError: # weakly-referenced object no longer exists
450
- log.warning('Weakly referenced object for resource.dataset no longer exists, '
451
- 'using a poor performance query instead.')
487
+ log.warning(
488
+ "Weakly referenced object for resource.dataset no longer exists, "
489
+ "using a poor performance query instead."
490
+ )
452
491
  return Dataset.objects(resources__id=self.id).first()
453
492
 
454
493
  def save(self, *args, **kwargs):
455
494
  if not self.dataset:
456
- raise RuntimeError('Impossible to save an orphan resource')
495
+ raise RuntimeError("Impossible to save an orphan resource")
457
496
  self.dataset.save(*args, **kwargs)
458
497
 
459
498
 
@@ -462,17 +501,18 @@ class Dataset(WithMetrics, BadgeMixin, Owned, db.Document):
462
501
  acronym = db.StringField(max_length=128)
463
502
  # /!\ do not set directly the slug when creating or updating a dataset
464
503
  # this will break the search indexation
465
- slug = db.SlugField(max_length=255, required=True, populate_from='title',
466
- update=True, follow=True)
467
- description = db.StringField(required=True, default='')
468
- license = db.ReferenceField('License')
504
+ slug = db.SlugField(
505
+ max_length=255, required=True, populate_from="title", update=True, follow=True
506
+ )
507
+ description = db.StringField(required=True, default="")
508
+ license = db.ReferenceField("License")
469
509
 
470
510
  tags = db.TagListField()
471
511
  resources = db.ListField(db.EmbeddedDocumentField(Resource))
472
512
 
473
513
  private = db.BooleanField(default=False)
474
514
  frequency = db.StringField(choices=list(UPDATE_FREQUENCIES.keys()))
475
- frequency_date = db.DateTimeField(verbose_name=_('Future date of update'))
515
+ frequency_date = db.DateTimeField(verbose_name=_("Future date of update"))
476
516
  temporal_coverage = db.EmbeddedDocumentField(db.DateRange)
477
517
  spatial = db.EmbeddedDocumentField(SpatialCoverage)
478
518
  schema = db.EmbeddedDocumentField(Schema)
@@ -483,60 +523,63 @@ class Dataset(WithMetrics, BadgeMixin, Owned, db.Document):
483
523
 
484
524
  featured = db.BooleanField(required=True, default=False)
485
525
 
486
- contact_point = db.ReferenceField('ContactPoint', reverse_delete_rule=db.NULLIFY)
526
+ contact_point = db.ReferenceField("ContactPoint", reverse_delete_rule=db.NULLIFY)
487
527
 
488
- created_at_internal = DateTimeField(verbose_name=_('Creation date'),
489
- default=datetime.utcnow, required=True)
490
- last_modified_internal = DateTimeField(verbose_name=_('Last modification date'),
491
- default=datetime.utcnow, required=True)
528
+ created_at_internal = DateTimeField(
529
+ verbose_name=_("Creation date"), default=datetime.utcnow, required=True
530
+ )
531
+ last_modified_internal = DateTimeField(
532
+ verbose_name=_("Last modification date"), default=datetime.utcnow, required=True
533
+ )
492
534
  deleted = db.DateTimeField()
493
535
  archived = db.DateTimeField()
494
536
 
495
537
  def __str__(self):
496
- return self.title or ''
538
+ return self.title or ""
497
539
 
498
540
  __badges__ = {
499
- PIVOTAL_DATA: _('Pivotal data'),
541
+ PIVOTAL_DATA: _("Pivotal data"),
500
542
  }
501
543
 
502
544
  __metrics_keys__ = [
503
- 'discussions',
504
- 'reuses',
505
- 'followers',
506
- 'views',
507
- 'resources_downloads',
545
+ "discussions",
546
+ "reuses",
547
+ "followers",
548
+ "views",
549
+ "resources_downloads",
508
550
  ]
509
551
 
510
552
  meta = {
511
- 'indexes': [
512
- '$title',
513
- 'created_at_internal',
514
- 'last_modified_internal',
515
- 'metrics.reuses',
516
- 'metrics.followers',
517
- 'metrics.views',
518
- 'slug',
519
- 'resources.id',
520
- 'resources.urlhash',
521
- ] + Owned.meta['indexes'],
522
- 'ordering': ['-created_at_internal'],
523
- 'queryset_class': DatasetQuerySet,
524
- 'auto_create_index_on_save': True
553
+ "indexes": [
554
+ "$title",
555
+ "created_at_internal",
556
+ "last_modified_internal",
557
+ "metrics.reuses",
558
+ "metrics.followers",
559
+ "metrics.views",
560
+ "slug",
561
+ "resources.id",
562
+ "resources.urlhash",
563
+ ]
564
+ + Owned.meta["indexes"],
565
+ "ordering": ["-created_at_internal"],
566
+ "queryset_class": DatasetQuerySet,
567
+ "auto_create_index_on_save": True,
525
568
  }
526
569
 
527
- before_save = signal('Dataset.before_save')
528
- after_save = signal('Dataset.after_save')
529
- on_create = signal('Dataset.on_create')
530
- on_update = signal('Dataset.on_update')
531
- before_delete = signal('Dataset.before_delete')
532
- after_delete = signal('Dataset.after_delete')
533
- on_delete = signal('Dataset.on_delete')
534
- on_archive = signal('Dataset.on_archive')
535
- on_resource_added = signal('Dataset.on_resource_added')
536
- on_resource_updated = signal('Dataset.on_resource_updated')
537
- on_resource_removed = signal('Dataset.on_resource_removed')
570
+ before_save = signal("Dataset.before_save")
571
+ after_save = signal("Dataset.after_save")
572
+ on_create = signal("Dataset.on_create")
573
+ on_update = signal("Dataset.on_update")
574
+ before_delete = signal("Dataset.before_delete")
575
+ after_delete = signal("Dataset.after_delete")
576
+ on_delete = signal("Dataset.on_delete")
577
+ on_archive = signal("Dataset.on_archive")
578
+ on_resource_added = signal("Dataset.on_resource_added")
579
+ on_resource_updated = signal("Dataset.on_resource_updated")
580
+ on_resource_removed = signal("Dataset.on_resource_removed")
538
581
 
539
- verbose_name = _('dataset')
582
+ verbose_name = _("dataset")
540
583
 
541
584
  @classmethod
542
585
  def pre_save(cls, sender, document, **kwargs):
@@ -544,10 +587,10 @@ class Dataset(WithMetrics, BadgeMixin, Owned, db.Document):
544
587
 
545
588
  @classmethod
546
589
  def post_save(cls, sender, document, **kwargs):
547
- if 'post_save' in kwargs.get('ignores', []):
590
+ if "post_save" in kwargs.get("ignores", []):
548
591
  return
549
592
  cls.after_save.send(document)
550
- if kwargs.get('created'):
593
+ if kwargs.get("created"):
551
594
  cls.on_create.send(document)
552
595
  else:
553
596
  cls.on_update.send(document)
@@ -560,32 +603,36 @@ class Dataset(WithMetrics, BadgeMixin, Owned, db.Document):
560
603
  self.frequency = LEGACY_FREQUENCIES[self.frequency]
561
604
 
562
605
  for key, value in self.extras.items():
563
- if not key.startswith('custom:'):
606
+ if not key.startswith("custom:"):
564
607
  continue
565
608
  if not self.organization:
566
609
  raise MongoEngineValidationError(
567
- 'Custom metadatas are only accessible to dataset owned by on organization.')
568
- custom_meta = key.split(':')[1]
569
- org_custom = self.organization.extras.get('custom', [])
610
+ "Custom metadatas are only accessible to dataset owned by on organization."
611
+ )
612
+ custom_meta = key.split(":")[1]
613
+ org_custom = self.organization.extras.get("custom", [])
570
614
  custom_present = False
571
615
  for custom in org_custom:
572
- if custom['title'] != custom_meta:
616
+ if custom["title"] != custom_meta:
573
617
  continue
574
618
  custom_present = True
575
- if custom['type'] == 'choice':
576
- if value not in custom['choices']:
619
+ if custom["type"] == "choice":
620
+ if value not in custom["choices"]:
577
621
  raise MongoEngineValidationError(
578
- 'Custom metadata choice is not defined by organization.')
622
+ "Custom metadata choice is not defined by organization."
623
+ )
579
624
  else:
580
- if not isinstance(value, locate(custom['type'])):
625
+ if not isinstance(value, locate(custom["type"])):
581
626
  raise MongoEngineValidationError(
582
- 'Custom metadata is not of the right type.')
627
+ "Custom metadata is not of the right type."
628
+ )
583
629
  if not custom_present:
584
630
  raise MongoEngineValidationError(
585
- 'Dataset\'s organization did not define the requested custom metadata.')
631
+ "Dataset's organization did not define the requested custom metadata."
632
+ )
586
633
 
587
634
  def url_for(self, *args, **kwargs):
588
- return endpoint_for('datasets.show', 'api.dataset', dataset=self, *args, **kwargs)
635
+ return endpoint_for("datasets.show", "api.dataset", dataset=self, *args, **kwargs)
589
636
 
590
637
  display_url = property(url_for)
591
638
 
@@ -601,7 +648,7 @@ class Dataset(WithMetrics, BadgeMixin, Owned, db.Document):
601
648
  def full_title(self):
602
649
  if not self.acronym:
603
650
  return self.title
604
- return '{title} ({acronym})'.format(**self._data)
651
+ return "{title} ({acronym})".format(**self._data)
605
652
 
606
653
  @property
607
654
  def external_url(self):
@@ -616,8 +663,7 @@ class Dataset(WithMetrics, BadgeMixin, Owned, db.Document):
616
663
 
617
664
  @property
618
665
  def frequency_label(self):
619
- return UPDATE_FREQUENCIES.get(self.frequency or 'unknown',
620
- UPDATE_FREQUENCIES['unknown'])
666
+ return UPDATE_FREQUENCIES.get(self.frequency or "unknown", UPDATE_FREQUENCIES["unknown"])
621
667
 
622
668
  def check_availability(self):
623
669
  """Check if resources from that dataset are available.
@@ -625,21 +671,28 @@ class Dataset(WithMetrics, BadgeMixin, Owned, db.Document):
625
671
  Return a list of (boolean or 'unknown')
626
672
  """
627
673
  # Only check remote resources.
628
- remote_resources = [resource
629
- for resource in self.resources
630
- if resource.filetype == 'remote']
674
+ remote_resources = [
675
+ resource for resource in self.resources if resource.filetype == "remote"
676
+ ]
631
677
  if not remote_resources:
632
678
  return []
633
679
  return [resource.check_availability() for resource in remote_resources]
634
680
 
635
681
  @property
636
682
  def created_at(self):
637
- return self.harvest.created_at if self.harvest and self.harvest.created_at else self.created_at_internal
683
+ return (
684
+ self.harvest.created_at
685
+ if self.harvest and self.harvest.created_at
686
+ else self.created_at_internal
687
+ )
638
688
 
639
689
  @property
640
690
  def last_modified(self):
641
- if (self.harvest and self.harvest.modified_at and
642
- to_naive_datetime(self.harvest.modified_at) < datetime.utcnow()):
691
+ if (
692
+ self.harvest
693
+ and self.harvest.modified_at
694
+ and to_naive_datetime(self.harvest.modified_at) < datetime.utcnow()
695
+ ):
643
696
  return to_naive_datetime(self.harvest.modified_at)
644
697
  return self.last_modified_internal
645
698
 
@@ -669,27 +722,27 @@ class Dataset(WithMetrics, BadgeMixin, Owned, db.Document):
669
722
  every 8 hours, but is maximum 24 hours later.
670
723
  """
671
724
  delta = None
672
- if self.frequency == 'hourly':
725
+ if self.frequency == "hourly":
673
726
  delta = timedelta(hours=1)
674
- elif self.frequency in ['fourTimesADay', 'threeTimesADay', 'semidaily', 'daily']:
727
+ elif self.frequency in ["fourTimesADay", "threeTimesADay", "semidaily", "daily"]:
675
728
  delta = timedelta(days=1)
676
- elif self.frequency in ['fourTimesAWeek', 'threeTimesAWeek', 'semiweekly', 'weekly']:
729
+ elif self.frequency in ["fourTimesAWeek", "threeTimesAWeek", "semiweekly", "weekly"]:
677
730
  delta = timedelta(weeks=1)
678
- elif self.frequency == 'biweekly':
731
+ elif self.frequency == "biweekly":
679
732
  delta = timedelta(weeks=2)
680
- elif self.frequency in ['threeTimesAMonth', 'semimonthly', 'monthly']:
733
+ elif self.frequency in ["threeTimesAMonth", "semimonthly", "monthly"]:
681
734
  delta = timedelta(days=31)
682
- elif self.frequency == 'bimonthly':
735
+ elif self.frequency == "bimonthly":
683
736
  delta = timedelta(days=31 * 2)
684
- elif self.frequency == 'quarterly':
737
+ elif self.frequency == "quarterly":
685
738
  delta = timedelta(days=365 / 4)
686
- elif self.frequency in ['threeTimesAYear', 'semiannual', 'annual']:
739
+ elif self.frequency in ["threeTimesAYear", "semiannual", "annual"]:
687
740
  delta = timedelta(days=365)
688
- elif self.frequency == 'biennial':
741
+ elif self.frequency == "biennial":
689
742
  delta = timedelta(days=365 * 2)
690
- elif self.frequency == 'triennial':
743
+ elif self.frequency == "triennial":
691
744
  delta = timedelta(days=365 * 3)
692
- elif self.frequency == 'quinquennial':
745
+ elif self.frequency == "quinquennial":
693
746
  delta = timedelta(days=365 * 5)
694
747
  if delta is None:
695
748
  return
@@ -711,48 +764,49 @@ class Dataset(WithMetrics, BadgeMixin, Owned, db.Document):
711
764
  # Quality is only relevant on saved Datasets
712
765
  return result
713
766
 
714
- result['license'] = True if self.license else False
715
- result['temporal_coverage'] = True if self.temporal_coverage else False
716
- result['spatial'] = True if self.spatial else False
767
+ result["license"] = True if self.license else False
768
+ result["temporal_coverage"] = True if self.temporal_coverage else False
769
+ result["spatial"] = True if self.spatial else False
717
770
 
718
- result['update_frequency'] = self.frequency and self.frequency != 'unknown'
771
+ result["update_frequency"] = self.frequency and self.frequency != "unknown"
719
772
  if self.next_update:
720
773
  # Allow for being one day late on update.
721
774
  # We may have up to one day delay due to harvesting for example
722
- result['update_fulfilled_in_time'] = (
775
+ result["update_fulfilled_in_time"] = (
723
776
  True if (self.next_update - datetime.utcnow()).days >= -1 else False
724
777
  )
725
- elif self.frequency in ['continuous', 'irregular', 'punctual']:
778
+ elif self.frequency in ["continuous", "irregular", "punctual"]:
726
779
  # For these frequencies, we don't expect regular updates or can't quantify them.
727
780
  # Thus we consider the update_fulfilled_in_time quality criterion to be true.
728
- result['update_fulfilled_in_time'] = True
781
+ result["update_fulfilled_in_time"] = True
729
782
 
730
- result['dataset_description_quality'] = (
731
- True if len(self.description) > current_app.config.get('QUALITY_DESCRIPTION_LENGTH')
783
+ result["dataset_description_quality"] = (
784
+ True
785
+ if len(self.description) > current_app.config.get("QUALITY_DESCRIPTION_LENGTH")
732
786
  else False
733
787
  )
734
788
 
735
789
  if self.resources:
736
- result['has_resources'] = True
737
- result['has_open_format'] = not all(
738
- resource.closed_or_no_format for resource in self.resources)
739
- result['all_resources_available'] = all(self.check_availability())
790
+ result["has_resources"] = True
791
+ result["has_open_format"] = not all(
792
+ resource.closed_or_no_format for resource in self.resources
793
+ )
794
+ result["all_resources_available"] = all(self.check_availability())
740
795
  resource_doc = False
741
796
  resource_desc = False
742
797
  for resource in self.resources:
743
- if resource.type == 'documentation':
798
+ if resource.type == "documentation":
744
799
  resource_doc = True
745
800
  if resource.description:
746
801
  resource_desc = True
747
- result['resources_documentation'] = resource_doc or resource_desc
802
+ result["resources_documentation"] = resource_doc or resource_desc
748
803
 
749
- result['score'] = self.compute_quality_score(result)
804
+ result["score"] = self.compute_quality_score(result)
750
805
  return result
751
-
806
+
752
807
  @property
753
808
  def downloads(self):
754
- return sum(resource.metrics.get('views', 0) for resource in self.resources)
755
-
809
+ return sum(resource.metrics.get("views", 0) for resource in self.resources)
756
810
 
757
811
  @staticmethod
758
812
  def normalize_score(score):
@@ -772,25 +826,25 @@ class Dataset(WithMetrics, BadgeMixin, Owned, db.Document):
772
826
  """
773
827
  score = 0
774
828
  UNIT = 1
775
- if quality['license']:
829
+ if quality["license"]:
776
830
  score += UNIT
777
- if quality['temporal_coverage']:
831
+ if quality["temporal_coverage"]:
778
832
  score += UNIT
779
- if quality['spatial']:
833
+ if quality["spatial"]:
780
834
  score += UNIT
781
- if quality['update_frequency']:
835
+ if quality["update_frequency"]:
782
836
  score += UNIT
783
- if 'update_fulfilled_in_time' in quality:
784
- if quality['update_fulfilled_in_time']:
837
+ if "update_fulfilled_in_time" in quality:
838
+ if quality["update_fulfilled_in_time"]:
785
839
  score += UNIT
786
- if quality['dataset_description_quality']:
840
+ if quality["dataset_description_quality"]:
787
841
  score += UNIT
788
- if 'has_resources' in quality:
789
- if quality['has_open_format']:
842
+ if "has_resources" in quality:
843
+ if quality["has_open_format"]:
790
844
  score += UNIT
791
- if quality['all_resources_available']:
845
+ if quality["all_resources_available"]:
792
846
  score += UNIT
793
- if quality['resources_documentation']:
847
+ if quality["resources_documentation"]:
794
848
  score += UNIT
795
849
  return self.normalize_score(score)
796
850
 
@@ -800,25 +854,18 @@ class Dataset(WithMetrics, BadgeMixin, Owned, db.Document):
800
854
  return obj or cls.objects.get_or_404(id=id_or_slug)
801
855
 
802
856
  def add_resource(self, resource):
803
- '''Perform an atomic prepend for a new resource'''
857
+ """Perform an atomic prepend for a new resource"""
804
858
  resource.validate()
805
- self.update(__raw__={
806
- '$push': {
807
- 'resources': {
808
- '$each': [resource.to_mongo()],
809
- '$position': 0
810
- }
811
- }
812
- })
859
+ self.update(
860
+ __raw__={"$push": {"resources": {"$each": [resource.to_mongo()], "$position": 0}}}
861
+ )
813
862
  self.reload()
814
863
  self.on_resource_added.send(self.__class__, document=self, resource_id=resource.id)
815
864
 
816
865
  def update_resource(self, resource):
817
- '''Perform an atomic update for an existing resource'''
866
+ """Perform an atomic update for an existing resource"""
818
867
  index = self.resources.index(resource)
819
- data = {
820
- 'resources__{index}'.format(index=index): resource
821
- }
868
+ data = {"resources__{index}".format(index=index): resource}
822
869
  self.update(**data)
823
870
  self.reload()
824
871
  self.on_resource_updated.send(self.__class__, document=self, resource_id=resource.id)
@@ -838,29 +885,26 @@ class Dataset(WithMetrics, BadgeMixin, Owned, db.Document):
838
885
  @cached_property
839
886
  def json_ld(self):
840
887
  result = {
841
- '@context': 'http://schema.org',
842
- '@type': 'Dataset',
843
- '@id': str(self.id),
844
- 'alternateName': self.slug,
845
- 'dateCreated': self.created_at.isoformat(),
846
- 'dateModified': self.last_modified.isoformat(),
847
- 'url': endpoint_for('datasets.show', 'api.dataset', dataset=self, _external=True),
848
- 'name': self.title,
849
- 'keywords': ','.join(self.tags),
850
- 'distribution': [resource.json_ld for resource in self.resources],
888
+ "@context": "http://schema.org",
889
+ "@type": "Dataset",
890
+ "@id": str(self.id),
891
+ "alternateName": self.slug,
892
+ "dateCreated": self.created_at.isoformat(),
893
+ "dateModified": self.last_modified.isoformat(),
894
+ "url": endpoint_for("datasets.show", "api.dataset", dataset=self, _external=True),
895
+ "name": self.title,
896
+ "keywords": ",".join(self.tags),
897
+ "distribution": [resource.json_ld for resource in self.resources],
851
898
  # Theses values are not standard
852
- 'contributedDistribution': [
853
- resource.json_ld for resource in self.community_resources
854
- ],
855
- 'extras': [get_json_ld_extra(*item)
856
- for item in self.extras.items()],
899
+ "contributedDistribution": [resource.json_ld for resource in self.community_resources],
900
+ "extras": [get_json_ld_extra(*item) for item in self.extras.items()],
857
901
  }
858
902
 
859
903
  if self.description:
860
- result['description'] = mdstrip(self.description)
904
+ result["description"] = mdstrip(self.description)
861
905
 
862
906
  if self.license and self.license.url:
863
- result['license'] = self.license.url
907
+ result["license"] = self.license.url
864
908
 
865
909
  if self.organization:
866
910
  author = self.organization.json_ld
@@ -870,34 +914,37 @@ class Dataset(WithMetrics, BadgeMixin, Owned, db.Document):
870
914
  author = None
871
915
 
872
916
  if author:
873
- result['author'] = author
917
+ result["author"] = author
874
918
 
875
919
  return result
876
920
 
877
921
  @property
878
922
  def internal(self):
879
923
  return {
880
- 'created_at_internal': self.created_at_internal,
881
- 'last_modified_internal': self.last_modified_internal
924
+ "created_at_internal": self.created_at_internal,
925
+ "last_modified_internal": self.last_modified_internal,
882
926
  }
883
927
 
884
928
  @property
885
929
  def views_count(self):
886
- return self.metrics.get('views', 0)
930
+ return self.metrics.get("views", 0)
887
931
 
888
932
  def count_discussions(self):
889
933
  from udata.models import Discussion
890
- self.metrics['discussions'] = Discussion.objects(subject=self, closed=None).count()
934
+
935
+ self.metrics["discussions"] = Discussion.objects(subject=self, closed=None).count()
891
936
  self.save()
892
937
 
893
938
  def count_reuses(self):
894
939
  from udata.models import Reuse
895
- self.metrics['reuses'] = Reuse.objects(datasets=self).visible().count()
940
+
941
+ self.metrics["reuses"] = Reuse.objects(datasets=self).visible().count()
896
942
  self.save()
897
943
 
898
944
  def count_followers(self):
899
945
  from udata.models import Follow
900
- self.metrics['followers'] = Follow.objects(until=None).followers(self).count()
946
+
947
+ self.metrics["followers"] = Follow.objects(until=None).followers(self).count()
901
948
  self.save()
902
949
 
903
950
 
@@ -906,84 +953,93 @@ post_save.connect(Dataset.post_save, sender=Dataset)
906
953
 
907
954
 
908
955
  class CommunityResource(ResourceMixin, WithMetrics, Owned, db.Document):
909
- '''
956
+ """
910
957
  Local file, remote file or API added by the community of the users to the
911
958
  original dataset
912
- '''
959
+ """
960
+
913
961
  dataset = db.ReferenceField(Dataset, reverse_delete_rule=db.NULLIFY)
914
962
 
915
963
  __metrics_keys__ = [
916
- 'views',
964
+ "views",
917
965
  ]
918
966
 
919
967
  meta = {
920
- 'ordering': ['-created_at_internal'],
921
- 'queryset_class': OwnedQuerySet,
968
+ "ordering": ["-created_at_internal"],
969
+ "queryset_class": OwnedQuerySet,
922
970
  }
923
971
 
924
972
  @property
925
973
  def from_community(self):
926
974
  return True
927
975
 
976
+
928
977
  class ResourceSchema(object):
929
978
  @staticmethod
930
979
  @cache.memoize(timeout=SCHEMA_CACHE_DURATION)
931
980
  def all():
932
- '''
981
+ """
933
982
  Get a list of schemas from a schema catalog endpoint.
934
983
 
935
984
  This has a double layer of cache:
936
985
  - @cache.cached decorator w/ short lived cache for normal operations
937
986
  - a long terme cache w/o timeout to be able to always render some content
938
- '''
939
- endpoint = current_app.config.get('SCHEMA_CATALOG_URL')
987
+ """
988
+ endpoint = current_app.config.get("SCHEMA_CATALOG_URL")
940
989
  if endpoint is None:
941
990
  return []
942
991
 
943
- cache_key = 'schema-catalog-objects'
992
+ cache_key = "schema-catalog-objects"
944
993
  try:
945
994
  response = requests.get(endpoint, timeout=5)
946
995
  # do not cache 404 and forward status code
947
996
  if response.status_code == 404:
948
- raise SchemasCatalogNotFoundException(f'Schemas catalog does not exist at {endpoint}')
997
+ raise SchemasCatalogNotFoundException(
998
+ f"Schemas catalog does not exist at {endpoint}"
999
+ )
949
1000
  response.raise_for_status()
950
1001
  except requests.exceptions.RequestException as e:
951
- log.exception(f'Error while getting schema catalog from {endpoint}')
1002
+ log.exception(f"Error while getting schema catalog from {endpoint}")
952
1003
  schemas = cache.get(cache_key)
953
1004
  else:
954
- schemas = response.json().get('schemas', [])
1005
+ schemas = response.json().get("schemas", [])
955
1006
  cache.set(cache_key, schemas)
956
1007
  # no cached version or no content
957
1008
  if not schemas:
958
- log.error(f'No content found inc. from cache for schema catalog')
959
- raise SchemasCacheUnavailableException('No content in cache for schema catalog')
1009
+ log.error(f"No content found inc. from cache for schema catalog")
1010
+ raise SchemasCacheUnavailableException("No content in cache for schema catalog")
960
1011
 
961
1012
  return schemas
962
-
1013
+
963
1014
  def assignable_schemas():
964
- return [s for s in ResourceSchema.all() if s.get('schema_type') not in NON_ASSIGNABLE_SCHEMA_TYPES]
1015
+ return [
1016
+ s
1017
+ for s in ResourceSchema.all()
1018
+ if s.get("schema_type") not in NON_ASSIGNABLE_SCHEMA_TYPES
1019
+ ]
965
1020
 
966
1021
  def get_existing_schema_info_by_url(url: str) -> tuple[str, str | None] | None:
967
- '''
1022
+ """
968
1023
  Returns the name and the version if exists
969
- '''
1024
+ """
970
1025
  for schema in ResourceSchema.all():
971
- for version in schema['versions']:
972
- if version['schema_url'] == url:
973
- return schema['name'], version['version_name']
1026
+ for version in schema["versions"]:
1027
+ if version["schema_url"] == url:
1028
+ return schema["name"], version["version_name"]
974
1029
 
975
- if schema['schema_url'] == url:
1030
+ if schema["schema_url"] == url:
976
1031
  # The main schema URL is often the 'latest' version but
977
1032
  # not sure if it's mandatory everywhere so set the version to
978
1033
  # None here.
979
- return schema['name'], None
1034
+ return schema["name"], None
980
1035
 
981
1036
  return None
982
-
1037
+
1038
+
983
1039
  def get_resource(id):
984
- '''Fetch a resource given its UUID'''
1040
+ """Fetch a resource given its UUID"""
985
1041
  dataset = Dataset.objects(resources__id=id).first()
986
1042
  if dataset:
987
- return get_by(dataset.resources, 'id', id)
1043
+ return get_by(dataset.resources, "id", id)
988
1044
  else:
989
1045
  return CommunityResource.objects(id=id).first()